From be6d42ef636e8e1c9fdcfa6f8738ee10e885d75b Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Wed, 18 Oct 2017 10:17:59 +0200 Subject: Version 1.4 --- CakeOlsFit.R | 326 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 326 insertions(+) create mode 100644 CakeOlsFit.R (limited to 'CakeOlsFit.R') diff --git a/CakeOlsFit.R b/CakeOlsFit.R new file mode 100644 index 0000000..75ac471 --- /dev/null +++ b/CakeOlsFit.R @@ -0,0 +1,326 @@ +# Originally: mkinfit.R 87 2010-12-09 07:31:59Z jranke $ + +# Based on code in mkinfit +# Portions Johannes Ranke 2010 +# Contact: mkin-devel@lists.berlios.de +# The summary function is an adapted and extended version of summary.modFit +# from the FME package, v 1.1 by Soetart and Petzoldt, which was in turn +# inspired by summary.nls.lm + +#$Id: CakeOlsFit.R 216 2011-07-05 14:35:03Z nelr $ +# This version has been modified to expect SFO parameterised as k and flow fractions +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta +# Authors: Rob Nelson, Richard Smith +# Tessella Project Reference: 6245 + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see .” + +CakeOlsFit <- function(mkinmod, observed, + parms.ini = rep(0.1, length(mkinmod$parms)), + state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), + lower = 0, upper = Inf, + fixed_parms = NULL, + fixed_initials = names(mkinmod$diffs)[-1], + eigen = TRUE, + plot = FALSE, quiet = FALSE, + err = NULL, weight = "none", scaleVar = FALSE, + atol = 1e-6, + ...) +{ + mod_vars <- names(mkinmod$diffs) + # Subset dataframe with mapped (modelled) variables + observed <- subset(observed, name %in% names(mkinmod$map)) + # Get names of observed variables + obs_vars = unique(as.character(observed$name)) + + # Name the parameters if they are not named yet + if(is.null(names(parms.ini))) names(parms.ini) <- mkinmod$parms + + # Name the inital parameter values if they are not named yet + if(is.null(names(state.ini))) names(state.ini) <- mod_vars + + # Parameters to be optimised + parms.fixed <- parms.ini[fixed_parms] + optim_parms <- setdiff(names(parms.ini), fixed_parms) + parms.optim <- parms.ini[optim_parms] + + state.ini.fixed <- state.ini[fixed_initials] + optim_initials <- setdiff(names(state.ini), fixed_initials) + state.ini.optim <- state.ini[optim_initials] + state.ini.optim.boxnames <- names(state.ini.optim) + if(length(state.ini.optim) > 0) { + names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_") + } + + # Decide if the solution of the model can be based on a simple analytical + # formula, the spectral decomposition of the matrix (fundamental system) + # or a numeric ode solver from the deSolve package + if (length(mkinmod$map) == 1) { + solution = "analytical" + } else { + if (is.matrix(mkinmod$coefmat) & eigen) solution = "eigen" + else solution = "deSolve" + } + + # Create a function calculating the differentials specified by the model + # if necessary + if(solution == "deSolve") { + mkindiff <- function(t, state, parms) { + time <- t + diffs <- vector() + for (box in mod_vars) + { + diffname <- paste("d", box, sep="_") + diffs[diffname] <- with(as.list(c(time,state, parms)), + eval(parse(text=mkinmod$diffs[[box]]))) + } + return(list(c(diffs))) + } + } + + cost.old <- 1e100 + calls <- 0 + out_predicted <- NA + # Define the model cost function + cost <- function(P) + { + assign("calls", calls+1, inherits=TRUE) + if(length(state.ini.optim) > 0) { + odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed) + names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed)) + } else odeini <- state.ini.fixed + + odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], parms.fixed) + + # Ensure initial state is at time 0 + outtimes = unique(c(0,observed$time)) + evalparse <- function(string) + { + eval(parse(text=string), as.list(c(odeparms, odeini))) + } + + # Solve the system + if (solution == "analytical") { + parent.type = names(mkinmod$map[[1]])[1] + parent.name = names(mkinmod$diffs)[[1]] + o <- switch(parent.type, + SFO = SFO.solution(outtimes, + evalparse(parent.name), + evalparse(paste("k", parent.name, sep="_"))), +# evalparse("k")), +# evalparse(paste("k", parent.name, "sink", sep="_"))), + FOMC = FOMC.solution(outtimes, + evalparse(parent.name), + evalparse("alpha"), evalparse("beta")), + DFOP = DFOP.solution(outtimes, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("g")), + HS = HS.solution(outtimes, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("tb")), + SFORB = SFORB.solution(outtimes, + evalparse(parent.name), + evalparse(paste("k", parent.name, "bound", sep="_")), + evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")), + evalparse(paste("k", parent.name, "sink", sep="_"))) + ) + out <- cbind(outtimes, o) + dimnames(out) <- list(outtimes, c("time", sub("_free", "", parent.name))) + } + if (solution == "eigen") { + coefmat.num <- matrix(sapply(as.vector(mkinmod$coefmat), evalparse), + nrow = length(mod_vars)) + e <- eigen(coefmat.num) + c <- solve(e$vectors, odeini) + f.out <- function(t) { + e$vectors %*% diag(exp(e$values * t), nrow=length(mod_vars)) %*% c + } + o <- matrix(mapply(f.out, outtimes), + nrow = length(mod_vars), ncol = length(outtimes)) + dimnames(o) <- list(mod_vars, outtimes) + out <- cbind(time = outtimes, t(o)) + } + if (solution == "deSolve") + { + out <- ode( + y = odeini, + times = outtimes, + func = mkindiff, + parms = odeparms, + atol = atol + ) + } + + # Output transformation for models with unobserved compartments like SFORB + out_transformed <- data.frame(time = out[,"time"]) + for (var in names(mkinmod$map)) { + if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") { + out_transformed[var] <- out[, var] + } else { + out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) + } + } + assign("out_predicted", out_transformed, inherits=TRUE) + + mC <- CakeCost(out_transformed, observed, y = "value", + err = err, weight = weight, scaleVar = scaleVar) + mC$penalties <- CakePenalties(odeparms, out_transformed, observed) + mC$model <- mC$cost + mC$penalties; + if (mC$model < cost.old) { + if (!quiet) + cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model, + "\n") + + # Plot the data and current model output if requested + if(plot) { + outtimes_plot = seq(min(observed$time), max(observed$time), length.out=100) + if (solution == "analytical") { + o_plot <- switch(parent.type, + SFO = SFO.solution(outtimes_plot, + evalparse(parent.name), + evalparse(paste("k", parent.name, sep="_"))), +# evalparse(paste("k", parent.name, "sink", sep="_"))), + FOMC = FOMC.solution(outtimes_plot, + evalparse(parent.name), + evalparse("alpha"), evalparse("beta")), + DFOP = DFOP.solution(outtimes_plot, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("g")), + HS = HS.solution(outtimes_plot, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("tb")), + SFORB = SFORB.solution(outtimes_plot, + evalparse(parent.name), + evalparse(paste("k", parent.name, "bound", sep="_")), + evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")), + evalparse(paste("k", parent.name, "sink", sep="_"))) + ) + out_plot <- cbind(outtimes_plot, o_plot) + dimnames(out_plot) <- list(outtimes_plot, c("time", sub("_free", "", parent.name))) + } + if(solution == "eigen") { + o_plot <- matrix(mapply(f.out, outtimes_plot), + nrow = length(mod_vars), ncol = length(outtimes_plot)) + dimnames(o_plot) <- list(mod_vars, outtimes_plot) + out_plot <- cbind(time = outtimes_plot, t(o_plot)) + } + if (solution == "deSolve") { + out_plot <- ode( + y = odeini, + times = outtimes_plot, + func = mkindiff, + parms = odeparms) + } + out_transformed_plot <- data.frame(time = out_plot[,"time"]) + for (var in names(mkinmod$map)) { + if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") { + out_transformed_plot[var] <- out_plot[, var] + } else { + out_transformed_plot[var] <- rowSums(out_plot[, mkinmod$map[[var]]]) + } + } + out_transformed_plot <<- out_transformed_plot + + plot(0, type="n", + xlim = range(observed$time), ylim = range(observed$value, na.rm=TRUE), + xlab = "Time", ylab = "Observed") + col_obs <- pch_obs <- 1:length(obs_vars) + names(col_obs) <- names(pch_obs) <- obs_vars + for (obs_var in obs_vars) { + points(subset(observed, name == obs_var, c(time, value)), + pch = pch_obs[obs_var], col = col_obs[obs_var]) + } + matlines(out_transformed_plot$time, out_transformed_plot[-1]) + legend("topright", inset=c(0.05, 0.05), legend=obs_vars, + col=col_obs, pch=pch_obs, lty=1:length(pch_obs)) + } + + assign("cost.old", mC$model, inherits=TRUE) + } + # HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost + mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res) + + return(mC) + } + fit <-modFit(cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, ...) + + # We need to return some more data for summary and plotting + fit$solution <- solution + if (solution == "eigen") { + fit$coefmat <- mkinmod$coefmat + } + if (solution == "deSolve") { + fit$mkindiff <- mkindiff + } + if (plot == TRUE) { + fit$out_transformed_plot = out_transformed_plot + } + + # We also need various other information for summary and plotting + fit$map <- mkinmod$map + fit$diffs <- mkinmod$diffs + + # mkin_long_to_wide does not handle ragged data +# fit$observed <- mkin_long_to_wide(observed) + fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") + names(fit$observed) <- c("time", as.vector(unique(observed$name))) + + predicted_long <- mkin_wide_to_long(out_predicted, time = "time") + fit$predicted <- out_predicted + + # Collect initial parameter values in two dataframes + fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) + fit$start$type = c(rep("state", length(state.ini.optim)), rep("deparm", length(parms.optim))) + fit$start$lower <- lower + fit$start$upper <- upper + + fit$fixed <- data.frame( + value = c(state.ini.fixed, parms.fixed)) + fit$fixed$type = c(rep("state", length(state.ini.fixed)), rep("deparm", length(parms.fixed))) + +# # Calculate chi2 error levels according to FOCUS (2006) + fit$errmin <- CakeChi2(observed, predicted_long, obs_vars, parms.optim, state.ini.optim) + + # Calculate dissipation times DT50 and DT90 and formation fractions + parms.all = c(fit$par, parms.fixed) + fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)), + row.names = obs_vars) + fit$ff <- vector() + fit$SFORB <- vector() + for (obs_var in obs_vars) { + type = names(mkinmod$map[[obs_var]])[1] + + fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all) + } + + fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed) + + # Collect observed, predicted and residuals + data<-observed + data$err<-rep(NA,length(data$time)) + data <- merge(data, predicted_long, by = c("time", "name")) + names(data)<-c("time", "variable", "observed","err-var", "predicted") + data$residual <- data$observed - data$predicted + data$variable <- ordered(data$variable, levels = obs_vars) + fit$data <- data[order(data$variable, data$time), ] + fit$atol <- atol + + class(fit) <- c("CakeFit", "mkinfit", "modFit") + return(fit) +} + -- cgit v1.2.1