# Originally: mkinfit.R 87 2010-12-09 07:31:59Z jranke $
# Based on code in mkinfit
# Portions Johannes Ranke 2010
# Contact: mkin-devel@lists.berlios.de
# The summary function is an adapted and extended version of summary.modFit
# from the FME package, v 1.1 by Soetart and Petzoldt, which was in turn
# inspired by summary.nls.lm
#$Id$
# This version has been modified to expect SFO parameterised as k and flow fractions
# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta
# Authors: Rob Nelson, Richard Smith, Tamar Christina
# Tessella Project Reference: 6245, 7247
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.”
CakeOlsFit <- function(mkinmod, observed,
parms.ini = rep(0.1, length(mkinmod$parms)),
state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)),
lower = 0, upper = Inf,
fixed_parms = NULL,
fixed_initials = names(mkinmod$diffs)[-1],
eigen = TRUE,
plot = FALSE, quiet = FALSE,
err = NULL, weight = "none", scaleVar = FALSE,
atol = 1e-6,
sannMaxIter = 10000,
useSolnp = FALSE, method='L-BFGS-B',
...)
{
mod_vars <- names(mkinmod$diffs)
# Subset dataframe with mapped (modelled) variables
observed <- subset(observed, name %in% names(mkinmod$map))
# Get names of observed variables
obs_vars = unique(as.character(observed$name))
# Name the parameters if they are not named yet
if(is.null(names(parms.ini))) names(parms.ini) <- mkinmod$parms
# Name the inital parameter values if they are not named yet
if(is.null(names(state.ini))) names(state.ini) <- mod_vars
# Parameters to be optimised
parms.fixed <- parms.ini[fixed_parms]
optim_parms <- setdiff(names(parms.ini), fixed_parms)
parms.optim <- parms.ini[optim_parms]
state.ini.fixed <- state.ini[fixed_initials]
optim_initials <- setdiff(names(state.ini), fixed_initials)
state.ini.optim <- state.ini[optim_initials]
state.ini.optim.boxnames <- names(state.ini.optim)
if(length(state.ini.optim) > 0) {
names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_")
}
# Decide if the solution of the model can be based on a simple analytical
# formula, the spectral decomposition of the matrix (fundamental system)
# or a numeric ode solver from the deSolve package
if (length(mkinmod$map) == 1) {
solution = "analytical"
} else {
if (is.matrix(mkinmod$coefmat) & eigen) solution = "eigen"
else solution = "deSolve"
}
# Create a function calculating the differentials specified by the model
# if necessary
if(solution == "deSolve") {
mkindiff <- function(t, state, parms) {
time <- t
diffs <- vector()
for (box in mod_vars)
{
diffname <- paste("d", box, sep="_")
diffs[diffname] <- with(as.list(c(time,state, parms)),
eval(parse(text=mkinmod$diffs[[box]])))
}
return(list(c(diffs)))
}
}
cost.old <- 1e100
calls <- 0
out_predicted <- NA
# Define the model cost function
cost <- function(P)
{
assign("calls", calls+1, inherits=TRUE)
if(length(state.ini.optim) > 0) {
odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed)
names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed))
} else odeini <- state.ini.fixed
odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], parms.fixed)
# Ensure initial state is at time 0
outtimes = unique(c(0,observed$time))
evalparse <- function(string)
{
eval(parse(text=string), as.list(c(odeparms, odeini)))
}
# Solve the system
if (solution == "analytical") {
parent.type = names(mkinmod$map[[1]])[1]
parent.name = names(mkinmod$diffs)[[1]]
o <- switch(parent.type,
SFO = SFO.solution(outtimes,
evalparse(parent.name),
evalparse(paste("k", parent.name, sep="_"))),
# evalparse("k")),
# evalparse(paste("k", parent.name, "sink", sep="_"))),
FOMC = FOMC.solution(outtimes,
evalparse(parent.name),
evalparse("alpha"), evalparse("beta")),
DFOP = DFOP.solution(outtimes,
evalparse(parent.name),
evalparse("k1"), evalparse("k2"),
evalparse("g")),
HS = HS.solution(outtimes,
evalparse(parent.name),
evalparse("k1"), evalparse("k2"),
evalparse("tb")),
SFORB = SFORB.solution(outtimes,
evalparse(parent.name),
evalparse(paste("k", parent.name, "bound", sep="_")),
evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")),
evalparse(paste("k", parent.name, "sink", sep="_")))
)
out <- cbind(outtimes, o)
dimnames(out) <- list(outtimes, c("time", sub("_free", "", parent.name)))
}
if (solution == "eigen") {
coefmat.num <- matrix(sapply(as.vector(mkinmod$coefmat), evalparse),
nrow = length(mod_vars))
e <- eigen(coefmat.num)
c <- solve(e$vectors, odeini)
f.out <- function(t) {
e$vectors %*% diag(exp(e$values * t), nrow=length(mod_vars)) %*% c
}
o <- matrix(mapply(f.out, outtimes),
nrow = length(mod_vars), ncol = length(outtimes))
dimnames(o) <- list(mod_vars, outtimes)
out <- cbind(time = outtimes, t(o))
}
if (solution == "deSolve")
{
out <- ode(
y = odeini,
times = outtimes,
func = mkindiff,
parms = odeparms,
atol = atol
)
}
# Output transformation for models with unobserved compartments like SFORB
out_transformed <- data.frame(time = out[,"time"])
for (var in names(mkinmod$map)) {
if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") {
out_transformed[var] <- out[, var]
} else {
out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]])
}
}
assign("out_predicted", out_transformed, inherits=TRUE)
mC <- CakeCost(out_transformed, observed, y = "value",
err = err, weight = weight, scaleVar = scaleVar)
mC$penalties <- CakePenalties(odeparms, out_transformed, observed)
mC$model <- mC$cost + mC$penalties;
if (mC$model < cost.old) {
if (!quiet)
cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model,
"\n")
# Plot the data and current model output if requested
if(plot) {
outtimes_plot = seq(min(observed$time), max(observed$time), length.out=100)
if (solution == "analytical") {
o_plot <- switch(parent.type,
SFO = SFO.solution(outtimes_plot,
evalparse(parent.name),
evalparse(paste("k", parent.name, sep="_"))),
# evalparse(paste("k", parent.name, "sink", sep="_"))),
FOMC = FOMC.solution(outtimes_plot,
evalparse(parent.name),
evalparse("alpha"), evalparse("beta")),
DFOP = DFOP.solution(outtimes_plot,
evalparse(parent.name),
evalparse("k1"), evalparse("k2"),
evalparse("g")),
HS = HS.solution(outtimes_plot,
evalparse(parent.name),
evalparse("k1"), evalparse("k2"),
evalparse("tb")),
SFORB = SFORB.solution(outtimes_plot,
evalparse(parent.name),
evalparse(paste("k", parent.name, "bound", sep="_")),
evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")),
evalparse(paste("k", parent.name, "sink", sep="_")))
)
out_plot <- cbind(outtimes_plot, o_plot)
dimnames(out_plot) <- list(outtimes_plot, c("time", sub("_free", "", parent.name)))
}
if(solution == "eigen") {
o_plot <- matrix(mapply(f.out, outtimes_plot),
nrow = length(mod_vars), ncol = length(outtimes_plot))
dimnames(o_plot) <- list(mod_vars, outtimes_plot)
out_plot <- cbind(time = outtimes_plot, t(o_plot))
}
if (solution == "deSolve") {
out_plot <- ode(
y = odeini,
times = outtimes_plot,
func = mkindiff,
parms = odeparms)
}
out_transformed_plot <- data.frame(time = out_plot[,"time"])
for (var in names(mkinmod$map)) {
if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") {
out_transformed_plot[var] <- out_plot[, var]
} else {
out_transformed_plot[var] <- rowSums(out_plot[, mkinmod$map[[var]]])
}
}
out_transformed_plot <<- out_transformed_plot
plot(0, type="n",
xlim = range(observed$time), ylim = range(observed$value, na.rm=TRUE),
xlab = "Time", ylab = "Observed")
col_obs <- pch_obs <- 1:length(obs_vars)
names(col_obs) <- names(pch_obs) <- obs_vars
for (obs_var in obs_vars) {
points(subset(observed, name == obs_var, c(time, value)),
pch = pch_obs[obs_var], col = col_obs[obs_var])
}
matlines(out_transformed_plot$time, out_transformed_plot[-1])
legend("topright", inset=c(0.05, 0.05), legend=obs_vars,
col=col_obs, pch=pch_obs, lty=1:length(pch_obs))
}
assign("cost.old", mC$model, inherits=TRUE)
}
# HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost
mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res)
return(mC)
}
fit <-modFit(cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, ...)
# We need to return some more data for summary and plotting
fit$solution <- solution
if (solution == "eigen") {
fit$coefmat <- mkinmod$coefmat
}
if (solution == "deSolve") {
fit$mkindiff <- mkindiff
}
if (plot == TRUE) {
fit$out_transformed_plot = out_transformed_plot
}
# We also need various other information for summary and plotting
fit$map <- mkinmod$map
fit$diffs <- mkinmod$diffs
# mkin_long_to_wide does not handle ragged data
# fit$observed <- mkin_long_to_wide(observed)
fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time")
names(fit$observed) <- c("time", as.vector(unique(observed$name)))
predicted_long <- mkin_wide_to_long(out_predicted, time = "time")
fit$predicted <- out_predicted
# Collect initial parameter values in two dataframes
fit$start <- data.frame(initial = c(state.ini.optim, parms.optim))
fit$start$type = c(rep("state", length(state.ini.optim)), rep("deparm", length(parms.optim)))
fit$start$lower <- lower
fit$start$upper <- upper
fit$fixed <- data.frame(
value = c(state.ini.fixed, parms.fixed))
fit$fixed$type = c(rep("state", length(state.ini.fixed)), rep("deparm", length(parms.fixed)))
# # Calculate chi2 error levels according to FOCUS (2006)
fit$errmin <- CakeChi2(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini)
# Calculate dissipation times DT50 and DT90 and formation fractions
parms.all = c(fit$par, parms.fixed)
fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)),
row.names = obs_vars)
fit$extraDT50<- data.frame(DT50 = rep(NA, 2), row.names = c("k1", "k2"))
fit$ff <- vector()
fit$SFORB <- vector()
for (obs_var in obs_vars) {
type = names(mkinmod$map[[obs_var]])[1]
fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all,sannMaxIter)
}
fit$extraDT50[ ,c("DT50")] = CakeExtraDT(type,parms.all)
fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed)
# Collect observed, predicted and residuals
data<-observed
data$err<-rep(NA,length(data$time))
data <- merge(data, predicted_long, by = c("time", "name"))
names(data)<-c("time", "variable", "observed","err-var", "predicted")
data$residual <- data$observed - data$predicted
data$variable <- ordered(data$variable, levels = obs_vars)
fit$data <- data[order(data$variable, data$time), ]
fit$atol <- atol
class(fit) <- c("CakeFit", "mkinfit", "modFit")
return(fit)
}