# Originally: mkinfit.R 87 2010-12-09 07:31:59Z jranke $ # Based on code in mkinfit # Portions Johannes Ranke 2010 # Contact: mkin-devel@lists.berlios.de # The summary function is an adapted and extended version of summary.modFit # from the FME package, v 1.1 by Soetart and Petzoldt, which was in turn # inspired by summary.nls.lm # Modifications developed by Tessella for Syngenta: Copyright (C) 2011-2016 Syngenta # The CAKE R modules are free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Performs an ordinary least squares fit on a given CAKE model. # # cake.model: The model to perform the fit on (as generated by CakeModel.R). # observed: Observation data to fit to. # parms.ini: Initial values for the parameters being fitted. # state.ini: Initial state (i.e. initial values for concentration, the dependent variable being modelled). # lower: Lower bounds to apply to parameters. # upper: Upper bound to apply to parameters. # fixed_parms: A vector of names of parameters that are fixed to their initial values. # fixed_initials: A vector of compartments with fixed initial concentrations. # quiet: Whether the internal cost functions should execute more quietly than normal (less output). # atol: The tolerance to apply to the ODE solver. # sannMaxIter: The maximum number of iterations to apply to SANN processes. CakeOlsFit <- function(cake.model, observed, parms.ini = rep(0.1, length(cake.model$parms)), state.ini = c(100, rep(0, length(cake.model$diffs) - 1)), lower = 0, upper = Inf, fixed_parms = NULL, fixed_initials = names(cake.model$diffs)[-1], quiet = FALSE, atol = 1e-6, sannMaxIter = 10000, ...) { mod_vars <- names(cake.model$diffs) # Subset dataframe with mapped (modelled) variables observed <- subset(observed, name %in% names(cake.model$map)) # Get names of observed variables obs_vars = unique(as.character(observed$name)) # Name the parameters if they are not named yet if(is.null(names(parms.ini))) names(parms.ini) <- cake.model$parms # Name the inital parameter values if they are not named yet if(is.null(names(state.ini))) names(state.ini) <- mod_vars # Parameters to be optimised parms.fixed <- parms.ini[fixed_parms] optim_parms <- setdiff(names(parms.ini), fixed_parms) parms.optim <- parms.ini[optim_parms] state.ini.fixed <- state.ini[fixed_initials] optim_initials <- setdiff(names(state.ini), fixed_initials) state.ini.optim <- state.ini[optim_initials] state.ini.optim.boxnames <- names(state.ini.optim) if(length(state.ini.optim) > 0) { names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_") } # Decide if the solution of the model can be based on a simple analytical # formula, the spectral decomposition of the matrix (fundamental system) # or a numeric ode solver from the deSolve package if (length(cake.model$map) == 1) { solution = "analytical" } else { solution = "deSolve" } # Create a function calculating the differentials specified by the model # if necessary if(solution == "deSolve") { mkindiff <- function(t, state, parms) { time <- t diffs <- vector() for (box in mod_vars) { diffname <- paste("d", box, sep="_") diffs[diffname] <- with(as.list(c(time,state, parms)), eval(parse(text=cake.model$diffs[[box]]))) } return(list(c(diffs))) } } costFunctions <- CakeInternalCostFunctions(cake.model, state.ini.optim, state.ini.optim.boxnames, state.ini.fixed, parms.fixed, observed, mkindiff = mkindiff, quiet, atol=atol, solution=solution, err=NULL) # modFit parameter transformations can explode if you put in parameters that are equal to a bound, so we move them away by a tiny amount. all.optim <- ShiftAwayFromBoundaries(c(state.ini.optim, parms.optim), lower, upper) fit <-modFit(costFunctions$cost, all.optim, lower = lower, upper = upper, ...) # We need to return some more data for summary and plotting fit$solution <- solution if (solution == "deSolve") { fit$mkindiff <- mkindiff } # We also need various other information for summary and plotting fit$map <- cake.model$map fit$diffs <- cake.model$diffs out_predicted <- costFunctions$get.predicted() predicted_long <- wide_to_long(out_predicted, time = "time") fit$predicted <- out_predicted # Collect initial parameter values in two dataframes fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) fit$start$type = c(rep("state", length(state.ini.optim)), rep("deparm", length(parms.optim))) fit$start$lower <- lower fit$start$upper <- upper fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) fit$fixed$type = c(rep("state", length(state.ini.fixed)), rep("deparm", length(parms.fixed))) # Calculate chi2 error levels according to FOCUS (2006) fit$errmin <- CakeChi2(cake.model, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini, fit$fixed) # Calculate dissipation times DT50 and DT90 and formation fractions parms.all = c(fit$par, parms.fixed) fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) fit$extraDT50<- data.frame(k1 = rep(NA, length(names(cake.model$map))), k2 = rep(NA, length(names(cake.model$map))), row.names = names(cake.model$map)) for (compartment.name in names(cake.model$map)) { type = names(cake.model$map[[compartment.name]])[1] fit$distimes[compartment.name, ] = CakeDT(type,compartment.name,parms.all,sannMaxIter) fit$extraDT50[compartment.name, ] = CakeExtraDT(type, compartment.name, parms.all) } fit$ioreRepDT = CakeIORERepresentativeDT("Parent", parms.all) fit$fomcRepDT = CakeFOMCBackCalculatedDT(parms.all) fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed) # Collect observed, predicted and residuals data<-observed data$err<-rep(NA,length(data$time)) data <- merge(data, predicted_long, by = c("time", "name")) names(data)<-c("time", "variable", "observed","err-var", "predicted") data$residual <- data$observed - data$predicted data$variable <- ordered(data$variable, levels = obs_vars) fit$data <- data[order(data$variable, data$time), ] fit$atol <- atol class(fit) <- c("CakeFit", "mkinfit", "modFit") return(fit) }