# Originally: mkinfit.R 87 2010-12-09 07:31:59Z jranke $ # Based on code in mkinfit # Portions Johannes Ranke 2010 # Contact: mkin-devel@lists.berlios.de # The summary function is an adapted and extended version of summary.modFit # from the FME package, v 1.1 by Soetart and Petzoldt, which was in turn # inspired by summary.nls.lm #$Id: CakeOlsFit.R 216 2011-07-05 14:35:03Z nelr $ # This version has been modified to expect SFO parameterised as k and flow fractions # Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta # Authors: Rob Nelson, Richard Smith # Tessella Project Reference: 6245 # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see .” CakeOlsFit <- function(mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)), state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), lower = 0, upper = Inf, fixed_parms = NULL, fixed_initials = names(mkinmod$diffs)[-1], eigen = TRUE, plot = FALSE, quiet = FALSE, err = NULL, weight = "none", scaleVar = FALSE, atol = 1e-6, ...) { mod_vars <- names(mkinmod$diffs) # Subset dataframe with mapped (modelled) variables observed <- subset(observed, name %in% names(mkinmod$map)) # Get names of observed variables obs_vars = unique(as.character(observed$name)) # Name the parameters if they are not named yet if(is.null(names(parms.ini))) names(parms.ini) <- mkinmod$parms # Name the inital parameter values if they are not named yet if(is.null(names(state.ini))) names(state.ini) <- mod_vars # Parameters to be optimised parms.fixed <- parms.ini[fixed_parms] optim_parms <- setdiff(names(parms.ini), fixed_parms) parms.optim <- parms.ini[optim_parms] state.ini.fixed <- state.ini[fixed_initials] optim_initials <- setdiff(names(state.ini), fixed_initials) state.ini.optim <- state.ini[optim_initials] state.ini.optim.boxnames <- names(state.ini.optim) if(length(state.ini.optim) > 0) { names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_") } # Decide if the solution of the model can be based on a simple analytical # formula, the spectral decomposition of the matrix (fundamental system) # or a numeric ode solver from the deSolve package if (length(mkinmod$map) == 1) { solution = "analytical" } else { if (is.matrix(mkinmod$coefmat) & eigen) solution = "eigen" else solution = "deSolve" } # Create a function calculating the differentials specified by the model # if necessary if(solution == "deSolve") { mkindiff <- function(t, state, parms) { time <- t diffs <- vector() for (box in mod_vars) { diffname <- paste("d", box, sep="_") diffs[diffname] <- with(as.list(c(time,state, parms)), eval(parse(text=mkinmod$diffs[[box]]))) } return(list(c(diffs))) } } cost.old <- 1e100 calls <- 0 out_predicted <- NA # Define the model cost function cost <- function(P) { assign("calls", calls+1, inherits=TRUE) if(length(state.ini.optim) > 0) { odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed) names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed)) } else odeini <- state.ini.fixed odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], parms.fixed) # Ensure initial state is at time 0 outtimes = unique(c(0,observed$time)) evalparse <- function(string) { eval(parse(text=string), as.list(c(odeparms, odeini))) } # Solve the system if (solution == "analytical") { parent.type = names(mkinmod$map[[1]])[1] parent.name = names(mkinmod$diffs)[[1]] o <- switch(parent.type, SFO = SFO.solution(outtimes, evalparse(parent.name), evalparse(paste("k", parent.name, sep="_"))), # evalparse("k")), # evalparse(paste("k", parent.name, "sink", sep="_"))), FOMC = FOMC.solution(outtimes, evalparse(parent.name), evalparse("alpha"), evalparse("beta")), DFOP = DFOP.solution(outtimes, evalparse(parent.name), evalparse("k1"), evalparse("k2"), evalparse("g")), HS = HS.solution(outtimes, evalparse(parent.name), evalparse("k1"), evalparse("k2"), evalparse("tb")), SFORB = SFORB.solution(outtimes, evalparse(parent.name), evalparse(paste("k", parent.name, "bound", sep="_")), evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")), evalparse(paste("k", parent.name, "sink", sep="_"))) ) out <- cbind(outtimes, o) dimnames(out) <- list(outtimes, c("time", sub("_free", "", parent.name))) } if (solution == "eigen") { coefmat.num <- matrix(sapply(as.vector(mkinmod$coefmat), evalparse), nrow = length(mod_vars)) e <- eigen(coefmat.num) c <- solve(e$vectors, odeini) f.out <- function(t) { e$vectors %*% diag(exp(e$values * t), nrow=length(mod_vars)) %*% c } o <- matrix(mapply(f.out, outtimes), nrow = length(mod_vars), ncol = length(outtimes)) dimnames(o) <- list(mod_vars, outtimes) out <- cbind(time = outtimes, t(o)) } if (solution == "deSolve") { out <- ode( y = odeini, times = outtimes, func = mkindiff, parms = odeparms, atol = atol ) } # Output transformation for models with unobserved compartments like SFORB out_transformed <- data.frame(time = out[,"time"]) for (var in names(mkinmod$map)) { if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") { out_transformed[var] <- out[, var] } else { out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) } } assign("out_predicted", out_transformed, inherits=TRUE) mC <- CakeCost(out_transformed, observed, y = "value", err = err, weight = weight, scaleVar = scaleVar) mC$penalties <- CakePenalties(odeparms, out_transformed, observed) mC$model <- mC$cost + mC$penalties; if (mC$model < cost.old) { if (!quiet) cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model, "\n") # Plot the data and current model output if requested if(plot) { outtimes_plot = seq(min(observed$time), max(observed$time), length.out=100) if (solution == "analytical") { o_plot <- switch(parent.type, SFO = SFO.solution(outtimes_plot, evalparse(parent.name), evalparse(paste("k", parent.name, sep="_"))), # evalparse(paste("k", parent.name, "sink", sep="_"))), FOMC = FOMC.solution(outtimes_plot, evalparse(parent.name), evalparse("alpha"), evalparse("beta")), DFOP = DFOP.solution(outtimes_plot, evalparse(parent.name), evalparse("k1"), evalparse("k2"), evalparse("g")), HS = HS.solution(outtimes_plot, evalparse(parent.name), evalparse("k1"), evalparse("k2"), evalparse("tb")), SFORB = SFORB.solution(outtimes_plot, evalparse(parent.name), evalparse(paste("k", parent.name, "bound", sep="_")), evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")), evalparse(paste("k", parent.name, "sink", sep="_"))) ) out_plot <- cbind(outtimes_plot, o_plot) dimnames(out_plot) <- list(outtimes_plot, c("time", sub("_free", "", parent.name))) } if(solution == "eigen") { o_plot <- matrix(mapply(f.out, outtimes_plot), nrow = length(mod_vars), ncol = length(outtimes_plot)) dimnames(o_plot) <- list(mod_vars, outtimes_plot) out_plot <- cbind(time = outtimes_plot, t(o_plot)) } if (solution == "deSolve") { out_plot <- ode( y = odeini, times = outtimes_plot, func = mkindiff, parms = odeparms) } out_transformed_plot <- data.frame(time = out_plot[,"time"]) for (var in names(mkinmod$map)) { if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") { out_transformed_plot[var] <- out_plot[, var] } else { out_transformed_plot[var] <- rowSums(out_plot[, mkinmod$map[[var]]]) } } out_transformed_plot <<- out_transformed_plot plot(0, type="n", xlim = range(observed$time), ylim = range(observed$value, na.rm=TRUE), xlab = "Time", ylab = "Observed") col_obs <- pch_obs <- 1:length(obs_vars) names(col_obs) <- names(pch_obs) <- obs_vars for (obs_var in obs_vars) { points(subset(observed, name == obs_var, c(time, value)), pch = pch_obs[obs_var], col = col_obs[obs_var]) } matlines(out_transformed_plot$time, out_transformed_plot[-1]) legend("topright", inset=c(0.05, 0.05), legend=obs_vars, col=col_obs, pch=pch_obs, lty=1:length(pch_obs)) } assign("cost.old", mC$model, inherits=TRUE) } # HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res) return(mC) } fit <-modFit(cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, ...) # We need to return some more data for summary and plotting fit$solution <- solution if (solution == "eigen") { fit$coefmat <- mkinmod$coefmat } if (solution == "deSolve") { fit$mkindiff <- mkindiff } if (plot == TRUE) { fit$out_transformed_plot = out_transformed_plot } # We also need various other information for summary and plotting fit$map <- mkinmod$map fit$diffs <- mkinmod$diffs # mkin_long_to_wide does not handle ragged data # fit$observed <- mkin_long_to_wide(observed) fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") names(fit$observed) <- c("time", as.vector(unique(observed$name))) predicted_long <- mkin_wide_to_long(out_predicted, time = "time") fit$predicted <- out_predicted # Collect initial parameter values in two dataframes fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) fit$start$type = c(rep("state", length(state.ini.optim)), rep("deparm", length(parms.optim))) fit$start$lower <- lower fit$start$upper <- upper fit$fixed <- data.frame( value = c(state.ini.fixed, parms.fixed)) fit$fixed$type = c(rep("state", length(state.ini.fixed)), rep("deparm", length(parms.fixed))) # # Calculate chi2 error levels according to FOCUS (2006) fit$errmin <- CakeChi2(observed, predicted_long, obs_vars, parms.optim, state.ini.optim) # Calculate dissipation times DT50 and DT90 and formation fractions parms.all = c(fit$par, parms.fixed) fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) fit$ff <- vector() fit$SFORB <- vector() for (obs_var in obs_vars) { type = names(mkinmod$map[[obs_var]])[1] fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all) } fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed) # Collect observed, predicted and residuals data<-observed data$err<-rep(NA,length(data$time)) data <- merge(data, predicted_long, by = c("time", "name")) names(data)<-c("time", "variable", "observed","err-var", "predicted") data$residual <- data$observed - data$predicted data$variable <- ordered(data$variable, levels = obs_vars) fit$data <- data[order(data$variable, data$time), ] fit$atol <- atol class(fit) <- c("CakeFit", "mkinfit", "modFit") return(fit) }