diff options
Diffstat (limited to 'CakeOlsFit.R')
-rw-r--r-- | CakeOlsFit.R | 318 |
1 files changed, 77 insertions, 241 deletions
diff --git a/CakeOlsFit.R b/CakeOlsFit.R index 626a595..2b8e736 100644 --- a/CakeOlsFit.R +++ b/CakeOlsFit.R @@ -7,13 +7,9 @@ # from the FME package, v 1.1 by Soetart and Petzoldt, which was in turn # inspired by summary.nls.lm -#$Id$ -# This version has been modified to expect SFO parameterised as k and flow fractions -# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta -# Authors: Rob Nelson, Richard Smith, Tamar Christina -# Tessella Project Reference: 6245, 7247 +# Modifications developed by Tessella for Syngenta: Copyright (C) 2011-2016 Syngenta -# This program is free software: you can redistribute it and/or modify +# The CAKE R modules are free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. @@ -24,57 +20,69 @@ # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>.” - -CakeOlsFit <- function(mkinmod, observed, - parms.ini = rep(0.1, length(mkinmod$parms)), - state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), - lower = 0, upper = Inf, - fixed_parms = NULL, - fixed_initials = names(mkinmod$diffs)[-1], - eigen = TRUE, - plot = FALSE, quiet = FALSE, - err = NULL, weight = "none", scaleVar = FALSE, - atol = 1e-6, - sannMaxIter = 10000, - useSolnp = FALSE, method='L-BFGS-B', - ...) +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +# Performs an ordinary least squares fit on a given CAKE model. +# +# cake.model: The model to perform the fit on (as generated by CakeModel.R). +# observed: Observation data to fit to. +# parms.ini: Initial values for the parameters being fitted. +# state.ini: Initial state (i.e. initial values for concentration, the dependent variable being modelled). +# lower: Lower bounds to apply to parameters. +# upper: Upper bound to apply to parameters. +# fixed_parms: A vector of names of parameters that are fixed to their initial values. +# fixed_initials: A vector of compartments with fixed initial concentrations. +# quiet: Whether the internal cost functions should execute more quietly than normal (less output). +# atol: The tolerance to apply to the ODE solver. +# sannMaxIter: The maximum number of iterations to apply to SANN processes. +CakeOlsFit <- function(cake.model, + observed, + parms.ini = rep(0.1, length(cake.model$parms)), + state.ini = c(100, rep(0, length(cake.model$diffs) - 1)), + lower = 0, + upper = Inf, + fixed_parms = NULL, + fixed_initials = names(cake.model$diffs)[-1], + quiet = FALSE, + atol = 1e-6, + sannMaxIter = 10000, + ...) { - mod_vars <- names(mkinmod$diffs) + mod_vars <- names(cake.model$diffs) # Subset dataframe with mapped (modelled) variables - observed <- subset(observed, name %in% names(mkinmod$map)) + observed <- subset(observed, name %in% names(cake.model$map)) # Get names of observed variables obs_vars = unique(as.character(observed$name)) - + # Name the parameters if they are not named yet - if(is.null(names(parms.ini))) names(parms.ini) <- mkinmod$parms - + if(is.null(names(parms.ini))) names(parms.ini) <- cake.model$parms + # Name the inital parameter values if they are not named yet if(is.null(names(state.ini))) names(state.ini) <- mod_vars - + # Parameters to be optimised parms.fixed <- parms.ini[fixed_parms] optim_parms <- setdiff(names(parms.ini), fixed_parms) parms.optim <- parms.ini[optim_parms] - + state.ini.fixed <- state.ini[fixed_initials] optim_initials <- setdiff(names(state.ini), fixed_initials) state.ini.optim <- state.ini[optim_initials] state.ini.optim.boxnames <- names(state.ini.optim) if(length(state.ini.optim) > 0) { - names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_") + names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_") } - + # Decide if the solution of the model can be based on a simple analytical # formula, the spectral decomposition of the matrix (fundamental system) # or a numeric ode solver from the deSolve package - if (length(mkinmod$map) == 1) { + if (length(cake.model$map) == 1) { solution = "analytical" } else { - if (is.matrix(mkinmod$coefmat) & eigen) solution = "eigen" - else solution = "deSolve" + solution = "deSolve" } - + # Create a function calculating the differentials specified by the model # if necessary if(solution == "deSolve") { @@ -85,234 +93,63 @@ CakeOlsFit <- function(mkinmod, observed, { diffname <- paste("d", box, sep="_") diffs[diffname] <- with(as.list(c(time,state, parms)), - eval(parse(text=mkinmod$diffs[[box]]))) + eval(parse(text=cake.model$diffs[[box]]))) } return(list(c(diffs))) } } - - cost.old <- 1e100 - calls <- 0 - out_predicted <- NA - # Define the model cost function - cost <- function(P) - { - assign("calls", calls+1, inherits=TRUE) - if(length(state.ini.optim) > 0) { - odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed) - names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed)) - } else odeini <- state.ini.fixed - - odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], parms.fixed) - - # Ensure initial state is at time 0 - outtimes = unique(c(0,observed$time)) - evalparse <- function(string) - { - eval(parse(text=string), as.list(c(odeparms, odeini))) - } - - # Solve the system - if (solution == "analytical") { - parent.type = names(mkinmod$map[[1]])[1] - parent.name = names(mkinmod$diffs)[[1]] - o <- switch(parent.type, - SFO = SFO.solution(outtimes, - evalparse(parent.name), - evalparse(paste("k", parent.name, sep="_"))), -# evalparse("k")), -# evalparse(paste("k", parent.name, "sink", sep="_"))), - FOMC = FOMC.solution(outtimes, - evalparse(parent.name), - evalparse("alpha"), evalparse("beta")), - DFOP = DFOP.solution(outtimes, - evalparse(parent.name), - evalparse("k1"), evalparse("k2"), - evalparse("g")), - HS = HS.solution(outtimes, - evalparse(parent.name), - evalparse("k1"), evalparse("k2"), - evalparse("tb")), - SFORB = SFORB.solution(outtimes, - evalparse(parent.name), - evalparse(paste("k", parent.name, "bound", sep="_")), - evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")), - evalparse(paste("k", parent.name, "sink", sep="_"))) - ) - out <- cbind(outtimes, o) - dimnames(out) <- list(outtimes, c("time", sub("_free", "", parent.name))) - } - if (solution == "eigen") { - coefmat.num <- matrix(sapply(as.vector(mkinmod$coefmat), evalparse), - nrow = length(mod_vars)) - e <- eigen(coefmat.num) - c <- solve(e$vectors, odeini) - f.out <- function(t) { - e$vectors %*% diag(exp(e$values * t), nrow=length(mod_vars)) %*% c - } - o <- matrix(mapply(f.out, outtimes), - nrow = length(mod_vars), ncol = length(outtimes)) - dimnames(o) <- list(mod_vars, outtimes) - out <- cbind(time = outtimes, t(o)) - } - if (solution == "deSolve") - { - out <- ode( - y = odeini, - times = outtimes, - func = mkindiff, - parms = odeparms, - atol = atol - ) - } - # Output transformation for models with unobserved compartments like SFORB - out_transformed <- data.frame(time = out[,"time"]) - for (var in names(mkinmod$map)) { - if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") { - out_transformed[var] <- out[, var] - } else { - out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) - } - } - assign("out_predicted", out_transformed, inherits=TRUE) - - mC <- CakeCost(out_transformed, observed, y = "value", - err = err, weight = weight, scaleVar = scaleVar) - mC$penalties <- CakePenalties(odeparms, out_transformed, observed) - mC$model <- mC$cost + mC$penalties; - if (mC$model < cost.old) { - if (!quiet) - cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model, - "\n") - - # Plot the data and current model output if requested - if(plot) { - outtimes_plot = seq(min(observed$time), max(observed$time), length.out=100) - if (solution == "analytical") { - o_plot <- switch(parent.type, - SFO = SFO.solution(outtimes_plot, - evalparse(parent.name), - evalparse(paste("k", parent.name, sep="_"))), -# evalparse(paste("k", parent.name, "sink", sep="_"))), - FOMC = FOMC.solution(outtimes_plot, - evalparse(parent.name), - evalparse("alpha"), evalparse("beta")), - DFOP = DFOP.solution(outtimes_plot, - evalparse(parent.name), - evalparse("k1"), evalparse("k2"), - evalparse("g")), - HS = HS.solution(outtimes_plot, - evalparse(parent.name), - evalparse("k1"), evalparse("k2"), - evalparse("tb")), - SFORB = SFORB.solution(outtimes_plot, - evalparse(parent.name), - evalparse(paste("k", parent.name, "bound", sep="_")), - evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")), - evalparse(paste("k", parent.name, "sink", sep="_"))) - ) - out_plot <- cbind(outtimes_plot, o_plot) - dimnames(out_plot) <- list(outtimes_plot, c("time", sub("_free", "", parent.name))) - } - if(solution == "eigen") { - o_plot <- matrix(mapply(f.out, outtimes_plot), - nrow = length(mod_vars), ncol = length(outtimes_plot)) - dimnames(o_plot) <- list(mod_vars, outtimes_plot) - out_plot <- cbind(time = outtimes_plot, t(o_plot)) - } - if (solution == "deSolve") { - out_plot <- ode( - y = odeini, - times = outtimes_plot, - func = mkindiff, - parms = odeparms) - } - out_transformed_plot <- data.frame(time = out_plot[,"time"]) - for (var in names(mkinmod$map)) { - if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") { - out_transformed_plot[var] <- out_plot[, var] - } else { - out_transformed_plot[var] <- rowSums(out_plot[, mkinmod$map[[var]]]) - } - } - out_transformed_plot <<- out_transformed_plot - - plot(0, type="n", - xlim = range(observed$time), ylim = range(observed$value, na.rm=TRUE), - xlab = "Time", ylab = "Observed") - col_obs <- pch_obs <- 1:length(obs_vars) - names(col_obs) <- names(pch_obs) <- obs_vars - for (obs_var in obs_vars) { - points(subset(observed, name == obs_var, c(time, value)), - pch = pch_obs[obs_var], col = col_obs[obs_var]) - } - matlines(out_transformed_plot$time, out_transformed_plot[-1]) - legend("topright", inset=c(0.05, 0.05), legend=obs_vars, - col=col_obs, pch=pch_obs, lty=1:length(pch_obs)) - } - - assign("cost.old", mC$model, inherits=TRUE) - } - # HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost - mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res) - - return(mC) - } - fit <-modFit(cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, ...) - + costFunctions <- CakeInternalCostFunctions(cake.model, state.ini.optim, state.ini.optim.boxnames, state.ini.fixed, + parms.fixed, observed, mkindiff = mkindiff, quiet, atol=atol, solution=solution, err=NULL) + + # modFit parameter transformations can explode if you put in parameters that are equal to a bound, so we move them away by a tiny amount. + all.optim <- ShiftAwayFromBoundaries(c(state.ini.optim, parms.optim), lower, upper) + + fit <-modFit(costFunctions$cost, all.optim, lower = lower, upper = upper, ...) + # We need to return some more data for summary and plotting fit$solution <- solution - if (solution == "eigen") { - fit$coefmat <- mkinmod$coefmat - } + if (solution == "deSolve") { fit$mkindiff <- mkindiff } - if (plot == TRUE) { - fit$out_transformed_plot = out_transformed_plot - } - + # We also need various other information for summary and plotting - fit$map <- mkinmod$map - fit$diffs <- mkinmod$diffs + fit$map <- cake.model$map + fit$diffs <- cake.model$diffs - # mkin_long_to_wide does not handle ragged data -# fit$observed <- mkin_long_to_wide(observed) - fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") - names(fit$observed) <- c("time", as.vector(unique(observed$name))) + out_predicted <- costFunctions$get.predicted() - predicted_long <- mkin_wide_to_long(out_predicted, time = "time") + predicted_long <- wide_to_long(out_predicted, time = "time") fit$predicted <- out_predicted - + # Collect initial parameter values in two dataframes fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) fit$start$type = c(rep("state", length(state.ini.optim)), rep("deparm", length(parms.optim))) fit$start$lower <- lower fit$start$upper <- upper - - fit$fixed <- data.frame( - value = c(state.ini.fixed, parms.fixed)) + + fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) fit$fixed$type = c(rep("state", length(state.ini.fixed)), rep("deparm", length(parms.fixed))) - -# # Calculate chi2 error levels according to FOCUS (2006) - fit$errmin <- CakeChi2(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini) - + + # Calculate chi2 error levels according to FOCUS (2006) + fit$errmin <- CakeChi2(cake.model, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini, fit$fixed) + # Calculate dissipation times DT50 and DT90 and formation fractions parms.all = c(fit$par, parms.fixed) fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)), - row.names = obs_vars) - fit$extraDT50<- data.frame(DT50 = rep(NA, 2), row.names = c("k1", "k2")) - - fit$ff <- vector() - fit$SFORB <- vector() - for (obs_var in obs_vars) { - type = names(mkinmod$map[[obs_var]])[1] - - fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all,sannMaxIter) + row.names = obs_vars) + fit$extraDT50<- data.frame(k1 = rep(NA, length(names(cake.model$map))), k2 = rep(NA, length(names(cake.model$map))), row.names = names(cake.model$map)) + + for (compartment.name in names(cake.model$map)) { + type = names(cake.model$map[[compartment.name]])[1] + + fit$distimes[compartment.name, ] = CakeDT(type,compartment.name,parms.all,sannMaxIter) + fit$extraDT50[compartment.name, ] = CakeExtraDT(type, compartment.name, parms.all) } - - fit$extraDT50[ ,c("DT50")] = CakeExtraDT(type,parms.all) + + fit$ioreRepDT = CakeIORERepresentativeDT("Parent", parms.all) + fit$fomcRepDT = CakeFOMCBackCalculatedDT(parms.all) fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed) # Collect observed, predicted and residuals @@ -324,8 +161,7 @@ CakeOlsFit <- function(mkinmod, observed, data$variable <- ordered(data$variable, levels = obs_vars) fit$data <- data[order(data$variable, data$time), ] fit$atol <- atol - + class(fit) <- c("CakeFit", "mkinfit", "modFit") return(fit) -} - +}
\ No newline at end of file |