summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CakeCost.R348
-rw-r--r--CakeInit.R39
-rw-r--r--CakeIrlsFit.R147
-rw-r--r--CakeIrlsPlot.R25
-rw-r--r--CakeMcmcFit.R292
-rw-r--r--CakeModel.R261
-rw-r--r--CakeNullPlot.R127
-rw-r--r--CakeOlsFit.R326
-rw-r--r--CakeOlsPlot.R117
-rw-r--r--CakePenalties.R56
-rw-r--r--CakeSummary.R301
11 files changed, 2039 insertions, 0 deletions
diff --git a/CakeCost.R b/CakeCost.R
new file mode 100644
index 0000000..1cc3981
--- /dev/null
+++ b/CakeCost.R
@@ -0,0 +1,348 @@
+## -----------------------------------------------------------------------------
+## The model cost and residuals
+## -----------------------------------------------------------------------------
+# The CAKE R modules are based on mkin,
+# Call to approx is only performed if there are multiple non NA values
+# which should prevent most of the crashes - Rob Nelson (Tessella)
+#
+# Modifications developed by Tessella Plc for Syngenta, Copyright (C) 2011 Syngenta
+# Authors: Rob Nelson, Richard Smith
+# Tessella Project Reference: 6245
+#
+# This program is free software: you can
+# redistribute them and/or modify them under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation, either version 3 of the License, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>#
+#
+# $Id: CakeCost.R 216 2011-07-05 14:35:03Z nelr $
+
+CakeCost <- function (model, obs, x = "time", y = NULL, err = NULL,
+ weight = "none", scaleVar = FALSE, cost = NULL, ...) {
+
+ ## convert vector to matrix
+ if (is.vector(obs)) {
+ cn <- names(obs)
+ obs <- matrix(data = obs, nrow = 1)
+ colnames(obs) <- cn
+ }
+ if (is.vector(model)) {
+ cn <- names(model)
+ model <- matrix(data=model, nrow = 1)
+ colnames(model) <- cn
+ }
+
+ ##=============================================================================
+ ## Observations
+ ##=============================================================================
+
+ ## The position of independent variable(s)
+ ix <- 0
+ if (! is.null(x)) { # mapping required...
+ ## For now multiple independent variables are not supported...
+ if (length(x) > 1)
+ stop ("multiple independent variables in 'obs' are not yet supported")
+
+ if (! is.character(x))
+ stop ("'x' should be the *name* of the column with the independent variable in 'obs' or NULL")
+ ix <- which(colnames(obs) %in% x)
+ if (length(ix) != length(x))
+ stop(paste("Independent variable column not found in observations", x))
+ } else ix <- NULL
+
+ ## The position of weighing values
+ ierr <- 0
+ if (! is.null(err)) {
+ if (! is.character(err))
+ stop ("'err' should be the *name* of the column with the error estimates in obs or NULL")
+ ierr <- which(colnames(obs) == err) # only one
+ if (length(ierr) == 0)
+ stop(paste("Column with error estimates not found in observations", err))
+ }
+
+ ## The dependent variables
+ type <- 1 # data input type: type 2 is table format, type 1 is long format...
+
+ if (!is.null(y)) { # it is in table format; first column are names of observed data...
+
+ Names <- as.character(unique(obs[, 1])) # Names of data sets, all data should be model variables...
+ Ndat <- length(Names) # Number of data sets
+ ilist <- 1:Ndat
+ if (! is.character(y))
+ stop ("'y' should be the *name* of the column with the values of the dependent variable in obs")
+ iy <- which(colnames(obs) == y)
+ if (length(iy) == 0)
+ stop(paste("Column with value of dependent variable not found in observations", y))
+ type <- 2
+
+ } else { # it is a matrix, variable names are column names
+ Ndat <- NCOL(obs)-1
+ Names <- colnames(obs)
+ ilist <- (1:NCOL(obs)) # column positions of the (dependent) observed variables
+ exclude <- ix # exclude columns that are not
+ if (ierr > 0)
+ exclude <- c(ix, ierr) # exclude columns that are not
+ if (length(exclude) > 0)
+ ilist <- ilist[-exclude]
+ }
+
+ #================================
+ # The model results
+ #================================
+
+ ModNames <- colnames(model) # Names of model variables
+ if (length(ix) > 1) {
+ ixMod <- NULL
+
+ for ( i in 1:length(ix)) {
+ ix2 <- which(colnames(model) == x[i])
+ if (length(ix2) == 0)
+ stop(paste("Cannot calculate cost: independent variable not found in model output", x[i]))
+ ixMod <- c(ixMod, ix2)
+ }
+
+ xMod <- model[,ixMod] # Independent variable, model
+ } else if (length(ix) == 1) {
+ ixMod <- which(colnames(model) == x)
+ if (length(ixMod) == 0)
+ stop(paste("Cannot calculate cost: independent variable not found in model output", x))
+ xMod <- model[,ixMod] # Independent variable, model
+ }
+ Residual <- NULL
+ CostVar <- NULL
+
+ #================================
+ # Compare model and data...
+ #================================
+ xDat <- 0
+ iDat <- 1:nrow(obs)
+
+ for (i in ilist) { # for each observed variable ...
+ ii <- which(ModNames == Names[i])
+ if (length(ii) == 0) stop(paste("observed variable not found in model output", Names[i]))
+ yMod <- model[,ii]
+ if (type == 2) { # table format
+ iDat <- which (obs[,1] == Names[i])
+ if (length(ix) > 0) xDat <- obs[iDat, ix]
+ obsdat <- obs[iDat, iy]
+ } else {
+ if (length(ix) > 0) xDat <- obs[, 1]
+ obsdat <- obs[,i]
+ }
+ ii <- which(is.na(obsdat))
+ if (length(ii) > 0) {
+ xDat <- xDat[-ii]
+ obsdat <- obsdat[-ii]
+ }
+
+ # CAKE version - Added tests for multiple non-NA values
+ if (length(ix) > 0 && length(unique(xMod[!is.na(xMod)]))>1 && length(yMod[!is.na(yMod)])>1)
+ {
+ ModVar <- approx(xMod, yMod, xout = xDat)$y
+ }
+ else {
+ cat("CakeCost Warning: Only one valid point - using mean (yMod was", yMod, ")\n")
+ ModVar <- mean(yMod[!is.na(yMod)])
+ obsdat <- mean(obsdat)
+ }
+ iex <- which(!is.na(ModVar))
+ ModVar <- ModVar[iex]
+ obsdat <- obsdat[iex]
+ xDat <- xDat[iex]
+ if (ierr > 0) {
+ Err <- obs[iDat, ierr]
+ Err <- Err[iex]
+ } else {
+ if (weight == "std")
+ Err <- sd(obsdat)
+ else if (weight == "mean")
+ Err <- mean(abs(obsdat))
+ else if (weight == "none")
+ Err <- 1
+ else
+ stop("error: do not recognize 'weight'; should be one of 'none', 'std', 'mean'")
+ }
+ if (any(is.na(Err)))
+ stop(paste("error: cannot estimate weighing for observed variable: ", Names[i]))
+ if (min(Err) <= 0)
+ stop(paste("error: weighing for observed variable is 0 or negative:", Names[i]))
+ if (scaleVar)
+ Scale <- 1/length(obsdat)
+ else Scale <- 1
+ Res <- (ModVar- obsdat)
+ res <- Res/Err
+ resScaled <- res*Scale
+ Residual <- rbind(Residual,
+ data.frame(
+ name = Names[i],
+ x = xDat,
+ obs = obsdat,
+ mod = ModVar,
+ weight = 1/Err,
+ res.unweighted = Res,
+ res = res))
+
+ CostVar <- rbind(CostVar,
+ data.frame(
+ name = Names[i],
+ scale = Scale,
+ N = length(Res),
+ SSR.unweighted = sum(Res^2),
+ SSR.unscaled = sum(res^2),
+ SSR = sum(resScaled^2)))
+ } # end loop over all observed variables
+
+ ## SSR
+ Cost <- sum(CostVar$SSR * CostVar$scale)
+
+ Lprob <- -sum(log(pmax(0, dnorm(Residual$mod, Residual$obs, Err)))) # avoid log of negative values
+ #Lprob <- -sum(log(pmax(.Machine$double.xmin, dnorm(Residual$mod, Residual$obs, Err)))) #avoid log(0)
+
+ if (! is.null(cost)) {
+ Cost <- Cost + cost$model
+ CostVar <- rbind(CostVar, cost$var)
+ Residual <- rbind(Residual, cost$residuals)
+ Lprob <- Lprob + cost$minlogp
+ }
+ out <- list(model = Cost, cost = Cost, minlogp = Lprob, var = CostVar, residuals = Residual)
+ class(out) <- "modCost"
+ return(out)
+}
+
+## -----------------------------------------------------------------------------
+## S3 methods of modCost
+## -----------------------------------------------------------------------------
+
+plot.modCost<- function(x, legpos="topleft", ...) {
+ nvar <- nrow(x$var)
+
+ dots <- list(...)
+
+ dots$xlab <- if(is.null(dots$xlab)) "x" else dots$xlab
+ dots$ylab <- if(is.null(dots$ylab)) "weighted residuals" else dots$ylab
+ DotsPch <- if(is.null(dots$pch)) (16:24) else dots$pch
+ dots$pch <- if(is.null(dots$pch)) (16:24)[x$residuals$name] else dots$pch[x$residuals$name]
+ DotsCol <- if(is.null(dots$col)) (1:nvar) else dots$col
+ dots$col <- if(is.null(dots$col)) (1:nvar)[x$residuals$name] else dots$col[x$residuals$name]
+
+ do.call("plot", c(alist(x$residuals$x, x$residuals$res), dots))
+
+# plot(x$residuals$x, x$residuals$res, xlab="x", ylab="weighted residuals",
+# pch=c(16:24)[x$residuals$name],col=c(1:nvar)[x$residuals$name],...)
+
+ if (! is.na(legpos))
+ legend(legpos, legend = x$var$name, col = DotsCol, pch = DotsPch)
+}
+
+## -----------------------------------------------------------------------------
+## Internal cost function for optimisers
+## -----------------------------------------------------------------------------
+# Cost function. The returned structure must have $model
+# Called from the middle of IrlsFit and OlsFit
+# We need to preserve state between calls so make a closure
+CakeInternalCostFunctions <- function(mkinmod, state.ini.optim, state.ini.optim.boxnames,
+ state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar,
+ quiet, plot=FALSE, atol=1e-6){
+ cost.old <- 1e+100
+ calls <- 0
+ out_predicted <- NA
+
+ get.predicted <- function(){ out_predicted }
+
+ get.best.cost <- function(){ cost.old }
+ reset.best.cost <- function() { cost.old<<-1e+100 }
+
+ get.calls <- function(){ calls }
+ set.calls <- function(newcalls){ calls <<- newcalls }
+
+ set.error<-function(err) { observed$err <<- err }
+
+ cost <- function(P) {
+ assign("calls", calls + 1, inherits = TRUE)
+ print(P)
+ if (length(state.ini.optim) > 0) {
+ odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed)
+ names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed))
+ }
+ else odeini <- state.ini.fixed
+ odeparms <- c(P[(length(state.ini.optim) + 1):length(P)],
+ parms.fixed)
+ # Ensure initial state is at time 0
+ outtimes = unique(c(0,observed$time))
+ out <- ode(y = odeini, times = outtimes, func = mkindiff,
+ parms = odeparms, atol=atol)
+ out_transformed <- data.frame(time = out[, "time"])
+ for (var in names(mkinmod$map)) {
+ if (length(mkinmod$map[[var]]) == 1) {
+ out_transformed[var] <- out[, var]
+ }
+ else {
+ out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]])
+ }
+ }
+ assign("out_predicted", out_transformed, inherits = TRUE)
+ mC <- CakeCost(out_transformed, observed, y = "value",
+ err = 'err', scaleVar = scaleVar)
+ mC$penalties <- CakePenalties(odeparms, out_transformed, observed)
+ mC$model <- mC$cost + mC$penalties;
+ if (mC$model < cost.old) {
+ if (!quiet)
+ cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model,
+ "\n")
+ if (plot) {
+ outtimes_plot = seq(min(observed$time), max(observed$time),
+ length.out = 100)
+ out_plot <- ode(y = odeini, times = outtimes_plot,
+ func = mkindiff, parms = odeparms, atol=atol)
+ out_transformed_plot <- data.frame(time = out_plot[,
+ "time"])
+ for (var in names(mkinmod$map)) {
+ if (length(mkinmod$map[[var]]) == 1) {
+ out_transformed_plot[var] <- out_plot[, var]
+ }
+ else {
+ out_transformed_plot[var] <- rowSums(out_plot[,
+ mkinmod$map[[var]]])
+ }
+ }
+ plot(0, type = "n", xlim = range(observed$time),
+ ylim = range(observed$value, na.rm = TRUE),
+ xlab = "Time", ylab = "Observed")
+ col_obs <- pch_obs <- 1:length(obs_vars)
+ names(col_obs) <- names(pch_obs) <- obs_vars
+ for (obs_var in obs_vars) {
+ points(subset(observed, name == obs_var, c(time,
+ value)), pch = pch_obs[obs_var], col = col_obs[obs_var])
+ }
+ matlines(out_transformed_plot$time, out_transformed_plot[-1])
+ legend("topright", inset = c(0.05, 0.05), legend = obs_vars,
+ col = col_obs, pch = pch_obs, lty = 1:length(pch_obs))
+ }
+ assign("cost.old", mC$model, inherits = TRUE)
+ }
+ # HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost
+ if(mC$penalties > 0){
+ #cat("Penalty adjustment: ",mC$penalties)
+ mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res)
+ }
+
+ return(mC)
+ }
+
+
+
+ list(cost=cost,
+ get.predicted=get.predicted,
+ get.calls=get.calls, set.calls=set.calls,
+ get.best.cost=get.best.cost, reset.best.cost=reset.best.cost,
+ set.error=set.error
+ )
+} \ No newline at end of file
diff --git a/CakeInit.R b/CakeInit.R
new file mode 100644
index 0000000..3e609d5
--- /dev/null
+++ b/CakeInit.R
@@ -0,0 +1,39 @@
+#$Id: CakeInit.R 221 2011-11-01 14:12:45Z nelr $
+
+# Purpose: Load the modules required by CAKE
+# Call with chdir=TRUE so it can load our source from the current directory
+# Tessella Project Reference: 6245
+
+# Copyright (C) 2011 Syngenta
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.”
+
+library(mkin)
+source("CakeModel.R")
+source("CakeOlsFit.R")
+source("CakeIrlsFit.R")
+source("CakeOlsPlot.R")
+source("CakeIrlsPlot.R")
+source("CakeNullPlot.R")
+source("CakeCost.R")
+source("CakePenalties.R")
+source("CakeMcmcFit.R")
+source("CakeSummary.R")
+
+options(width=1000)
+options(error=recover)
+options(warn=-1)
+
+# When running from C#, this must match the C# CAKE version
+CAKE.version<-"1.4"
diff --git a/CakeIrlsFit.R b/CakeIrlsFit.R
new file mode 100644
index 0000000..bf20572
--- /dev/null
+++ b/CakeIrlsFit.R
@@ -0,0 +1,147 @@
+#$Id: CakeIrlsFit.R 216 2011-07-05 14:35:03Z nelr $
+#
+# The CAKE R modules are based on mkin
+# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta
+# Authors: Rob Nelson, Richard Smith
+# Tessella Project Reference: 6245
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.”
+#
+CakeIrlsFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)),
+ state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), lower = 0,
+ upper = Inf, fixed_parms = NULL, fixed_initials = names(mkinmod$diffs)[-1],
+ plot = FALSE, quiet = FALSE, err = NULL, weight = "none",
+ scaleVar = FALSE, atol=1e-6, control=list(),...)
+{
+
+### This is a modification based on the "mkinfit" function.
+### version 0.1 July 20
+###
+# This version has been modified to expect SFO parameterised as k and flow fractions
+# Based on code in IRLSkinfit
+ NAind <-which(is.na(observed$value))
+ mod_vars <- names(mkinmod$diffs)
+ observed <- subset(observed, name %in% names(mkinmod$map))
+ ERR <- rep(1,nrow(observed))
+ observed <- cbind(observed,err=ERR)
+ obs_vars = unique(as.character(observed$name))
+ if (is.null(names(parms.ini)))
+ names(parms.ini) <- mkinmod$parms
+ mkindiff <- function(t, state, parms) {
+ time <- t
+ diffs <- vector()
+ for (box in mod_vars) {
+ diffname <- paste("d", box, sep = "_")
+ diffs[diffname] <- with(as.list(c(time, state, parms)),
+ eval(parse(text = mkinmod$diffs[[box]])))
+ }
+ return(list(c(diffs)))
+ }
+ if (is.null(names(state.ini)))
+ names(state.ini) <- mod_vars
+ parms.fixed <- parms.ini[fixed_parms]
+ optim_parms <- setdiff(names(parms.ini), fixed_parms)
+ parms.optim <- parms.ini[optim_parms]
+ state.ini.fixed <- state.ini[fixed_initials]
+ optim_initials <- setdiff(names(state.ini), fixed_initials)
+ state.ini.optim <- state.ini[optim_initials]
+ state.ini.optim.boxnames <- names(state.ini.optim)
+ if (length(state.ini.optim) > 0) {
+ names(state.ini.optim) <- paste(names(state.ini.optim),
+ "0", sep = "_")
+ }
+
+ costFunctions <- CakeInternalCostFunctions(mkinmod, state.ini.optim, state.ini.optim.boxnames,
+ state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, quiet, atol=atol)
+
+ ############### Iteratively Reweighted Least Squares#############
+ ## Start with no weighting
+ fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower,
+ upper = upper,control=control,...)
+
+ if(length(control)==0)
+ {
+ irls.control <- list(maxIter=10,tol=1e-05)
+ control <- list(irls.control=irls.control)
+ }else{
+ if(is.null(control$irls.control))
+ {
+ irls.control <- list(maxIter=10,tol=1e-05)
+ control <- list(irls.control=irls.control)
+ }
+ }
+ irls.control <- control$irls.control
+ maxIter <- irls.control$maxIter
+ tol <- irls.control$tol
+ ####
+ niter <- 1
+ ## insure one IRLS iteration
+ diffsigma <- 100
+ olderr <- rep(1,length(mod_vars))
+ while(diffsigma>tol & niter<=maxIter)
+ {
+ err <- sqrt(fit$var_ms_unweighted)
+ ERR <- err[as.character(observed$name)]
+ costFunctions$set.error(ERR)
+ diffsigma <- sum((err-olderr)^2)
+ cat("IRLS iteration at",niter, "; Diff in error variance ", diffsigma,"\n")
+ olderr <- err
+ fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, control=control, ...)
+ niter <- niter+1
+
+ ### If not converged, reweight and fit
+ }
+
+ ###########################################
+ fit$mkindiff <- mkindiff
+ fit$map <- mkinmod$map
+ fit$diffs <- mkinmod$diffs
+
+ out_predicted <- costFunctions$get.predicted()
+
+ # mkin_long_to_wide does not handle ragged data
+ fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time")
+ names(fit$observed) <- c("time", as.vector(unique(observed$name)))
+
+ predicted_long <- mkin_wide_to_long(out_predicted, time = "time")
+ fit$predicted <- out_predicted
+ fit$start <- data.frame(initial = c(state.ini.optim, parms.optim))
+ fit$start$type = c(rep("state", length(state.ini.optim)),
+ rep("deparm", length(parms.optim)))
+ fit$start$lower <- lower
+ fit$start$upper <- upper
+ fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed))
+ fit$fixed$type = c(rep("state", length(state.ini.fixed)),
+ rep("deparm", length(parms.fixed)))
+
+ fit$errmin <- CakeChi2(observed, predicted_long, obs_vars, parms.optim, state.ini.optim)
+ parms.all = c(fit$par, parms.fixed)
+ fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed)
+ fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)),
+ DT90 = rep(NA, length(obs_vars)), row.names = obs_vars)
+ for (obs_var in obs_vars) {
+ type = names(mkinmod$map[[obs_var]])[1]
+ fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all)
+ }
+
+ data <- merge(observed, predicted_long, by = c("time", "name"))
+
+
+ names(data) <- c("time", "variable", "observed","err-var", "predicted")
+ data$residual <- data$observed - data$predicted
+ data$variable <- ordered(data$variable, levels = obs_vars)
+ fit$data <- data[order(data$variable, data$time), ]
+ class(fit) <- c("CakeFit", "mkinfit", "modFit")
+ return(fit)
+}
diff --git a/CakeIrlsPlot.R b/CakeIrlsPlot.R
new file mode 100644
index 0000000..2de8910
--- /dev/null
+++ b/CakeIrlsPlot.R
@@ -0,0 +1,25 @@
+#$Id: CakeIrlsPlot.R 216 2011-07-05 14:35:03Z nelr $
+# Generates fitted curves so the GUI can plot them
+# Author: Rob Nelson (Tessella)
+# Tessella Project Reference: 6245
+
+# Copyright (C) 2011 Syngenta
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.”
+
+CakeIrlsPlot <- function(fit, xlim = range(fit$data$time), ...)
+{
+ CakeOlsPlot(fit, xlim, ...)
+}
+
diff --git a/CakeMcmcFit.R b/CakeMcmcFit.R
new file mode 100644
index 0000000..209f904
--- /dev/null
+++ b/CakeMcmcFit.R
@@ -0,0 +1,292 @@
+# $Id: CakeMcmcFit.R 216 2011-07-05 14:35:03Z nelr $
+# The CAKE R modules are based on mkin,
+# Based on mcmckinfit as modified by Bayer
+# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta
+# Author: Rob Nelson
+# Tessella Project Reference: 6245
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.”
+
+CakeMcmcFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)),
+ state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), lower = 0,
+ upper = Inf, fixed_parms = NULL, fixed_initials = names(mkinmod$diffs)[-1],
+ plot = FALSE, quiet = FALSE, err = NULL, weight = "none",
+ scaleVar = FALSE, commonsigma=FALSE,jump = NULL,prior = NULL,
+ wvar0=0.1,niter = 1000,
+ outputlength = niter, burninlength = 0, updatecov = niter,
+ ntrydr = 1, drscale = NULL, verbose = TRUE,fitstart=TRUE,seed=NULL,atol=1e-6,...)
+{
+
+ NAind <-which(is.na(observed$value))
+ mod_vars <- names(mkinmod$diffs)
+ observed <- subset(observed, name %in% names(mkinmod$map))
+ ERR <- rep(1,nrow(observed))
+ observed <- cbind(observed,err=ERR)
+ obs_vars = unique(as.character(observed$name))
+ if (is.null(names(parms.ini)))
+ names(parms.ini) <- mkinmod$parms
+ mkindiff <- function(t, state, parms) {
+ time <- t
+ diffs <- vector()
+ for (box in mod_vars) {
+ diffname <- paste("d", box, sep = "_")
+ diffs[diffname] <- with(as.list(c(time, state, parms)),
+ eval(parse(text = mkinmod$diffs[[box]])))
+ }
+ return(list(c(diffs)))
+ }
+ if (is.null(names(state.ini)))
+ names(state.ini) <- mod_vars
+ parms.fixed <- parms.ini[fixed_parms]
+ optim_parms <- setdiff(names(parms.ini), fixed_parms)
+ parms.optim <- parms.ini[optim_parms]
+ state.ini.fixed <- state.ini[fixed_initials]
+ optim_initials <- setdiff(names(state.ini), fixed_initials)
+ state.ini.optim <- state.ini[optim_initials]
+ state.ini.optim.boxnames <- names(state.ini.optim)
+ if (length(state.ini.optim) > 0) {
+ names(state.ini.optim) <- paste(names(state.ini.optim),
+ "0", sep = "_")
+ }
+
+ costFunctions <- CakeInternalCostFunctions(mkinmod, state.ini.optim, state.ini.optim.boxnames,
+ state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, quiet, atol=atol)
+ bestIteration <- -1;
+ costWithStatus <- function(P, ...){
+ r <- costFunctions$cost(P)
+ if(r$cost == costFunctions$get.best.cost()) {
+ bestIteration<<-costFunctions$get.calls();
+ cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n')
+ }
+ cat("MCMC Call no.", costFunctions$get.calls(), '\n')
+ return(r)
+ }
+
+ # Set the seed
+ if ( is.null(seed) ) {
+ # No seed so create a random one so there is something to report
+ seed = runif(1,0,2^31-1)
+ }
+ seed = as.integer(seed)
+ set.seed(seed)
+
+ if(fitstart==TRUE)
+ {
+ ## ############# Get Initial Paramtervalues #############
+ ## Start with no weighting
+ fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower,
+ upper = upper,...)
+
+ if(commonsigma==TRUE){
+ #observed$err <- rep(1,nrow(observed))
+ cov0 <- summary(fit)$cov.scaled*2.4^2/length(fit$par)
+ costFunctions$set.calls(0); costFunctions$reset.best.cost()
+ res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose)
+ #res <- modMCMC(cost,fit$par,lower=lower,upper=upper,niter=niter)
+ }else{
+ ## ############## One reweighted estimation ############################
+ ## Estimate the error variance(sd)
+ tmpres <- fit$residuals
+ oldERR <- observed$err
+ err <- rep(NA,length(mod_vars))
+ for(i in 1:length(mod_vars))
+ {
+ box <- mod_vars[i]
+ ind <- which(names(tmpres)==box)
+ tmp <- tmpres[ind]
+ err[i] <- sd(tmp)
+ }
+ names(err) <- mod_vars
+ ERR <- err[as.character(observed$name)]
+ observed$err <-ERR
+ costFunctions$set.error(ERR)
+ olderr <- rep(1,length(mod_vars))
+ diffsigma <- sum((err-olderr)^2)
+ ## At least do one iteration step to get a weighted LS
+ fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, ...)
+ ## Use this as the Input for MCMC algorithm
+ ## ##########################
+ fs <- summary(fit)
+ cov0 <- if(all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled*2.4^2/length(fit$par)
+ var0 <- fit$var_ms_unweighted
+ costFunctions$set.calls(0); costFunctions$reset.best.cost()
+ res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose)
+
+ }
+ }else {
+ stop('fitstart=FALSE code removed until needed')
+ }
+ # Construct the fit object to return
+ fit$start <- data.frame(initial = c(state.ini.optim, parms.optim))
+ fit$start$type = c(rep("state", length(state.ini.optim)),
+ rep("deparm", length(parms.optim)))
+ fit$start$lower <- lower
+ fit$start$upper <- upper
+ fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed))
+ fit$fixed$type = c(rep("state", length(state.ini.fixed)),
+ rep("deparm", length(parms.fixed)))
+
+ fit$mkindiff <- mkindiff
+ fit$map <- mkinmod$map
+ fit$diffs <- mkinmod$diffs
+
+ # mkin_long_to_wide does not handle ragged data
+ fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time")
+ #names(fit$observed) <- c("time", as.vector(unique(observed$name)))
+ fns <- function(str) {
+ fields <- strsplit(str, "\\.")[[1]]
+ if (fields[1] == "value") {
+ return(fields[2])
+ }
+ else {
+ return(str)
+ }
+ }
+ names(fit$observed) <- sapply(names(fit$observed), fns)
+
+ # Replace mean from modFit with mean from modMCMC
+ fnm <- function(x) mean(res$pars[,x])
+ fit$par <- sapply(dimnames(res$pars)[[2]],fnm)
+ fit$bestpar <- res$bestpar
+ fit$costfn <- costWithStatus
+
+ # Disappearence times
+ parms.all <- c(fit$par, parms.fixed)
+ obs_vars = unique(as.character(observed$name))
+ fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)),
+ DT90 = rep(NA, length(obs_vars)), row.names = obs_vars)
+ for (obs_var in obs_vars) {
+ type = names(mkinmod$map[[obs_var]])[1]
+ fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all)
+ }
+
+
+ # Fits to data
+ odeini <- state.ini
+ odeparms <- parms.all
+ # If this step modelled the start amount, include it. Lookup in vectors returns NA not null if missing
+ if(!is.na(odeparms["Parent_0"])) { odeini['Parent'] = odeparms["Parent_0"] }
+
+ # Solve the system
+ evalparse <- function(string)
+ {
+ eval(parse(text=string), as.list(c(odeparms, odeini)))
+ }
+
+ # Ensure initial state is at time 0
+ obstimes <- unique(c(0,observed$time))
+
+ out <- ode(
+ y = odeini,
+ times = obstimes,
+ func = mkindiff,
+ parms = odeparms,
+ )
+
+ # Output transformation for models with unobserved compartments like SFORB
+ out_transformed <- data.frame(time = out[,"time"])
+ for (var in names(mkinmod$map)) {
+ if(length(mkinmod$map[[var]]) == 1) {
+ out_transformed[var] <- out[, var]
+ } else {
+ out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]])
+ }
+ }
+ fit$predicted <- out_transformed
+ fit$penalties <- CakePenaltiesLong(odeparms, out_transformed, observed)
+
+ predicted_long <- mkin_wide_to_long(out_transformed,time="time")
+ obs_vars = unique(as.character(observed$name))
+ fit$errmin <- CakeChi2(observed, predicted_long, obs_vars, parms.optim, state.ini.optim)
+
+ data<-observed
+ data$err<-rep(NA,length(data$time))
+ data<-merge(data, predicted_long, by=c("time","name"))
+ names(data)<-c("time", "variable", "observed","err-var", "predicted")
+ data$residual<-data$observed-data$predicted
+ data$variable <- ordered(data$variable, levels = obs_vars)
+ fit$data <- data[order(data$variable, data$time), ]
+
+ sq <- data$residual * data$residual
+ fit$ssr <- sum(sq)
+
+ fit$seed = seed
+
+ fit$res <- res
+ class(fit) <- c("CakeMcmcFit", "mkinfit", "modFit")
+ return(fit)
+}
+
+
+# Summarise a fit
+summary.CakeMcmcFit <- function(object, data = TRUE, distimes = TRUE, ff = TRUE, cov = FALSE,...) {
+ param <- object$par
+ pnames <- names(param)
+ p <- length(param)
+ #covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance
+ mcmc <- object$res
+ covar <- cov(mcmc$pars)
+
+ rdf <- object$df.residual
+
+ message <- "ok"
+ rownames(covar) <- colnames(covar) <-pnames
+
+ #se <- sqrt(diag(covar) * resvar)
+ fnse <- function(x) sd(mcmc$pars[,x]) #/sqrt(length(mcmc$pars[,x]))
+ se <- sapply(dimnames(mcmc$pars)[[2]],fnse)
+
+ tval <- param / se
+
+ if (!all(object$start$lower >=0)) {
+ message <- "Note that the one-sided t-test may not be appropriate if
+ parameter values below zero are possible."
+ warning(message)
+ } else message <- "ok"
+
+ # Parameter fits
+ alpha <- 0.05
+ lci <- param + qt(alpha/2,rdf)*se
+ uci <- param + qt(1-alpha/2,rdf)*se
+ param <- cbind(param, se, tval, pt(tval, rdf, lower.tail = FALSE), lci, uci)
+ dimnames(param) <- list(pnames, c("Estimate", "Std. Error", "t value", "Pr(>t)", "Lower CI", "Upper CI"))
+
+ # Residuals from mean of MCMC fit
+ resvar <- object$ssr/ rdf
+ modVariance <- object$ssr / length(object$data$residual)
+
+ ans <- list(residuals = object$data$residuals,
+ residualVariance = resvar,
+ sigma = sqrt(resvar),
+ modVariance = modVariance,
+ df = c(p, rdf), cov.unscaled = covar,
+ cov.scaled = covar * resvar,
+ info = object$info, niter = object$iterations,
+ stopmess = message,
+ par = param)
+
+ ans$diffs <- object$diffs
+ ans$data <- object$data
+ ans$additionalstats = CakeAdditionalStats(object$data)
+ ans$seed <- object$seed
+
+ ans$start <- object$start
+ ans$fixed <- object$fixed
+ ans$errmin <- object$errmin
+ if(distimes) ans$distimes <- object$distimes
+ if(ff) ans$ff <- object$ff
+ if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB
+ class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.modFit")
+ return(ans)
+}
diff --git a/CakeModel.R b/CakeModel.R
new file mode 100644
index 0000000..a93d7af
--- /dev/null
+++ b/CakeModel.R
@@ -0,0 +1,261 @@
+# Was Id: mkinmod.R 71 2010-09-12 01:13:36Z jranke
+# $Id: CakeModel.R 216 2011-07-05 14:35:03Z nelr $
+
+# The CAKE R modules are based on mkin
+# Portions Johannes Ranke, 2010
+# Contact: mkin-devel@lists.berlios.de
+
+# This version has been modified to parameterise SFO as k and flow fractions
+# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta
+# Tessella Project Reference: 6245
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.”
+
+CakeModel <- function(...)
+{
+ spec <- list(...)
+ obs_vars <- names(spec)
+
+ # The returned model will be a list of character vectors, containing
+ # differential equations, parameter names and a mapping from model variables
+ # to observed variables. If possible, a matrix representation of the
+ # differential equations is included
+ parms <- vector()
+ diffs <- vector()
+ map <- list()
+ if(spec[[1]]$type %in% c("FOMC", "DFOP", "HS")) {
+ mat = FALSE
+ if(!is.null(spec[[1]]$to)) {
+ message <- paste("Only constant formation fractions over time are implemented.",
+ "Depending on the reason for the time dependence of degradation, this may be unrealistic",
+ sep="\n")
+ warning(message)
+ } else message <- "ok"
+ } else mat = TRUE
+
+ mat = FALSE # XYZZY Not implemented yet assuming it should be
+ nlt = list()
+
+ # Establish list of differential equations
+ for (varname in obs_vars)
+ {
+ if(is.null(spec[[varname]]$type)) stop(
+ "Every argument to mkinmod must be a list containing a type component")
+ if(!spec[[varname]]$type %in% c("SFO", "FOMC", "DFOP", "HS", "SFORB")) stop(
+ "Available types are SFO, FOMC, DFOP, HS and SFORB only")
+ new_parms <- vector()
+
+ # New (sub)compartments (boxes) needed for the model type
+ new_boxes <- switch(spec[[varname]]$type,
+ SFO = varname,
+ FOMC = varname,
+ DFOP = varname,
+ HS = varname,
+ SFORB = paste(varname, c("free", "bound"), sep="_")
+ )
+ map[[varname]] <- new_boxes
+ names(map[[varname]]) <- rep(spec[[varname]]$type, length(new_boxes))
+
+ # Start a new differential equation for each new box
+ new_diffs <- paste("d_", new_boxes, " =", sep="")
+
+ # Turn on sink if not specified otherwise
+ if(is.null(spec[[varname]]$sink)) spec[[varname]]$sink <- TRUE
+
+ #@@@ADD SFO k HERE !!!!!!!!!!!!!
+ # Construct and add SFO term and add SFO parameters if needed
+ if(spec[[varname]]$type == "SFO") {
+ # From p. 53 of the FOCUS kinetics report
+ k_term <- paste("k", new_boxes[[1]], sep="_")
+ nonlinear_term <- paste(k_term, new_boxes[[1]], sep=" * ")
+ spec[[varname]]$nlt<-nonlinear_term
+ new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term)
+ new_parms <- k_term
+ ff <- vector()
+ }
+
+ # Construct and add FOMC term and add FOMC parameters if needed
+ if(spec[[varname]]$type == "FOMC") {
+ if(match(varname, obs_vars) != 1) {
+ stop("Type FOMC is only possible for the first compartment, which is assumed to be the source compartment")
+ }
+# if(spec[[varname]]$sink == FALSE) {
+# stop("Turning off the sink for the FOMC model is not implemented")
+# }
+ # From p. 53 of the FOCUS kinetics report
+ nonlinear_term <- paste("(alpha/beta) * ((time/beta) + 1)^-1 *", new_boxes[[1]])
+ spec[[varname]]$nlt<-nonlinear_term
+ new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term)
+ new_parms <- c("alpha", "beta")
+ ff <- vector()
+ }
+
+ # Construct and add DFOP term and add DFOP parameters if needed
+ if(spec[[varname]]$type == "DFOP") {
+ if(match(varname, obs_vars) != 1) {
+ stop("Type DFOP is only possible for the first compartment, which is assumed to be the source compartment")
+ }
+# if(spec[[varname]]$sink == FALSE) {
+# stop("Turning off the sink for the DFOP model is not implemented")
+# }
+ # From p. 57 of the FOCUS kinetics report
+ nonlinear_term <- paste("((k1 * g * exp(-k1 * time) + k2 * (1 - g) * exp(-k2 * time)) / (g * exp(-k1 * time) + (1 - g) * exp(-k2 * time))) *", new_boxes[[1]])
+ spec[[varname]]$nlt<-nonlinear_term
+ new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term)
+ new_parms <- c("k1", "k2", "g")
+ ff <- vector()
+ }
+
+ # Construct and add HS term and add HS parameters if needed
+ if(spec[[varname]]$type == "HS") {
+ if(match(varname, obs_vars) != 1) {
+ stop("Type HS is only possible for the first compartment, which is assumed to be the source compartment")
+ }
+# if(spec[[varname]]$sink == FALSE) {
+# stop("Turning off the sink for the HS model is not implemented")
+# }
+ # From p. 55 of the FOCUS kinetics report
+# nonlinear_term <- paste("ifelse(time <= tb, k1, k2)", "*", new_boxes[[1]])
+ nonlinear_term <- paste("((k1 - k2) / ( 1 + exp( 10*(time - tb) ) ) + k2)", "*", new_boxes[[1]])
+ spec[[varname]]$nlt<-nonlinear_term
+ new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term)
+ new_parms <- c("k1", "k2", "tb")
+ ff <- vector()
+ }
+
+ # Construct terms for transfer to sink and add if appropriate
+
+ #@@@@ REMOVE THIS ?????
+ if(spec[[varname]]$sink) {
+ # Add first-order sink term to first (or only) box for SFO and SFORB
+# if(spec[[varname]]$type %in% c("SFO", "SFORB")) {
+ if(spec[[varname]]$type == "SFORB") {
+ k_compound_sink <- paste("k", new_boxes[[1]], "sink", sep="_")
+ sink_term <- paste("-", k_compound_sink, "*", new_boxes[[1]])
+ new_diffs[[1]] <- paste(new_diffs[[1]], sink_term)
+ new_parms <- k_compound_sink
+ }
+ }
+
+ # Add reversible binding if appropriate
+ if(spec[[varname]]$type == "SFORB") {
+ k_free_bound <- paste("k", varname, "free", "bound", sep="_")
+ k_bound_free <- paste("k", varname, "bound", "free", sep="_")
+ reversible_binding_terms <- c(
+ paste("-", k_free_bound, "*", new_boxes[[1]], "+", k_bound_free, "*", new_boxes[[2]]),
+ paste("+", k_free_bound, "*", new_boxes[[1]], "-", k_bound_free, "*", new_boxes[[2]]))
+ new_diffs <- paste(new_diffs, reversible_binding_terms)
+ new_parms <- c(new_parms, k_free_bound, k_bound_free)
+ }
+
+ # Add observed variable to model
+ parms <- c(parms, new_parms)
+ names(new_diffs) <- new_boxes
+ diffs <- c(diffs, new_diffs)
+ }
+ # Transfer between compartments
+ for (varname in obs_vars) {
+ to <- spec[[varname]]$to
+ if(!is.null(to)) {
+ origin_box <- switch(spec[[varname]]$type,
+ SFO = varname,
+ FOMC = varname,
+ DFOP = varname,
+ HS = varname,
+ SFORB = paste(varname, "free", sep="_"))
+ fraction_left <- NULL
+ for (target in to) {
+ target_box <- switch(spec[[target]]$type,
+ SFO = target,
+ SFORB = paste(target, "free", sep="_"))
+ # SFO is no longer special
+ #if(spec[[varname]]$type %in% c("SFO", "SFORB")) {
+ if(spec[[varname]]$type == "SFORB") {
+ k_from_to <- paste("k", origin_box, target_box, sep="_")
+ diffs[[origin_box]] <- paste(diffs[[origin_box]], "-",
+ k_from_to, "*", origin_box)
+ diffs[[target_box]] <- paste(diffs[[target_box]], "+",
+ k_from_to, "*", origin_box)
+ parms <- c(parms, k_from_to)
+ }
+ # Handle SFO like the others
+# if(spec[[varname]]$type %in% c("FOMC", "DFOP", "HS")) {
+ if(spec[[varname]]$type %in% c("FOMC", "DFOP", "HS", "SFO")) {
+ if ( length(to)==1 && !spec[[varname]]$sink ) {
+ # There is only one output, so no need for any flow fractions. Just add the whole flow from the parent
+ diffs[[target_box]] <- paste(diffs[[target_box]], "+", spec[[varname]]$nlt)
+ } else {
+ fraction_to_target = paste("f", varname, "to", target, sep="_")
+ fraction_not_to_target = paste("(1 - ", fraction_to_target, ")", sep="")
+ if(is.null(fraction_left)) {
+ fraction_really_to_target = fraction_to_target
+ fraction_left = fraction_not_to_target
+ } else {
+ # If this is the last output and there is no sink, it gets what's left
+ if ( target==tail(to,1) && !spec[[varname]]$sink ) {
+ fraction_really_to_target = fraction_left
+ } else {
+# (1-fa)fb version
+# fraction_really_to_target = paste(fraction_left, " * ", fraction_to_target, sep="")
+# fraction_left = paste(fraction_left, " * ", fraction_not_to_target, sep="")
+# fb version
+ fraction_really_to_target = fraction_to_target
+ fraction_left = paste(fraction_left, " - ", fraction_to_target, sep="")
+ }
+ }
+ ff[target_box] = fraction_really_to_target
+ diffs[[target_box]] <- paste(diffs[[target_box]], "+", ff[target_box], "*", spec[[varname]]$nlt)
+ # Add the flow fraction parameter (if it exists)
+ if ( target!=tail(to,1) || spec[[varname]]$sink ) {
+ parms <- c(parms, fraction_to_target)
+ }
+ }
+ }
+ }
+ }
+ }
+ model <- list(diffs = diffs, parms = parms, map = map)
+
+ # Create coefficient matrix if appropriate
+ if (mat) {
+ boxes <- names(diffs)
+ n <- length(boxes)
+ m <- matrix(nrow=n, ncol=n, dimnames=list(boxes, boxes))
+ for (from in boxes) {
+ for (to in boxes) {
+ if (from == to) {
+ #@@@@ OMIT NEXT LINE? !!!!!
+ k.candidate = paste("k", from, c(boxes, "sink"), sep="_")
+ k.candidate = sub("free.*bound", "free_bound", k.candidate)
+ k.candidate = sub("bound.*free", "bound_free", k.candidate)
+ k.effective = intersect(model$parms, k.candidate)
+ m[from,to] = ifelse(length(k.effective) > 0,
+ paste("-", k.effective, collapse = " "), "0")
+ } else {
+ k.candidate = paste("k", from, to, sep="_")
+ k.candidate = sub("free.*bound", "free_bound", k.candidate)
+ k.candidate = sub("bound.*free", "bound_free", k.candidate)
+ k.effective = intersect(model$parms, k.candidate)
+ m[to, from] = ifelse(length(k.effective) > 0,
+ k.effective, "0")
+ }
+ }
+ }
+ model$coefmat <- m
+ }
+
+ if (exists("ff")) model$ff = ff
+ class(model) <- "mkinmod"
+ invisible(model)
+}
diff --git a/CakeNullPlot.R b/CakeNullPlot.R
new file mode 100644
index 0000000..3eacd67
--- /dev/null
+++ b/CakeNullPlot.R
@@ -0,0 +1,127 @@
+#$Id: CakeNullPlot.R 216 2011-07-05 14:35:03Z nelr $
+# Generates model curves so the GUI can plot them. Used when all params are fixed so there was no fit
+# The CAKE R modules are based on mkin
+# Based on code in IRLSkinfit
+# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta
+# Author: Rob Nelson (Tessella)
+# Tessella Project Reference: 6245
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.”
+
+CakeNullPlot <- function(model, parms.ini, state.ini, observed, xlim = range(observed$time), digits = max(3, getOption("digits") - 3), ...)
+{
+ # Print the fixed values
+ fixed <- data.frame(value = c(state.ini, parms.ini))
+ fixed$type = c(rep("state", length(state.ini)), rep("deparm", length(parms.ini)))
+ cat("\nFixed parameter values:\n")
+ print(fixed)
+
+ # Print disappearence times
+ obs_vars = unique(as.character(observed$name))
+ distimes <- data.frame(DT50 = rep(NA, length(obs_vars)),
+ DT90 = rep(NA, length(obs_vars)), row.names = obs_vars)
+ for (obs_var in obs_vars) {
+ type = names(model$map[[obs_var]])[1]
+ if(exists("DT50")) distimes[obs_var, ] = c(DT50, DT90)
+ distimes[obs_var, ] = CakeDT(type,obs_var,parms.ini)
+ }
+ cat("\nEstimated disappearance times:\n")
+ print(distimes, digits=digits,...)
+
+ # Plot the model
+
+ outtimes <- seq(0, xlim[2], length.out=101)
+
+ odeini <- state.ini
+ odenames <- model$parms
+ odeparms <- params.ini
+
+ # Solve the system
+ evalparse <- function(string)
+ {
+ eval(parse(text=string), as.list(c(odeparms, odeini)))
+ }
+
+ mod_vars <- names(model$diffs)
+ mkindiff <- function(t, state, parms) {
+ time <- t
+ diffs <- vector()
+ for (box in mod_vars) {
+ diffname <- paste("d", box, sep = "_")
+ diffs[diffname] <- with(as.list(c(time, state, parms)),
+ eval(parse(text = model$diffs[[box]])))
+ }
+ return(list(c(diffs)))
+ }
+
+ # Ensure initial state is at time 0
+ obstimes <- unique(c(0,observed$time))
+ out <- ode(
+ y = odeini,
+ times = obstimes,
+ func = mkindiff,
+ parms = odeparms,
+ )
+
+ # Output transformation for models with unobserved compartments like SFORB
+ out_transformed <- data.frame(time = out[,"time"])
+ for (var in names(model$map)) {
+ if(length(model$map[[var]]) == 1) {
+ out_transformed[var] <- out[, var]
+ } else {
+ out_transformed[var] <- rowSums(out[, model$map[[var]]])
+ }
+ }
+
+ predicted_long <- mkin_wide_to_long(out_transformed,time="time")
+ # Calculate chi2 error levels according to FOCUS (2006)
+ errmin<-CakeChi2(observed, predicted_long, obs_vars, c(), c())
+ cat("\nChi2 error levels in percent:\n")
+ errmin$err.min <- 100 * errmin$err.min
+ print(errmin, digits=digits,...)
+
+
+ # Fit to data
+ data<-observed
+ data$err<-rep(NA,length(data$time))
+ data<-merge(data, predicted_long, by=c("time","name"))
+ names(data)<-c("time", "variable", "observed","err-var", "predicted")
+ data$residual<-data$observed-data$predicted
+ data<-data[order(data$variable,data$time),]
+ cat("\nData:\n")
+ print(format(data, digits = digits, scientific = FALSE,...), row.names = FALSE)
+
+
+ out <- ode(
+ y = odeini,
+ times = outtimes,
+ func = mkindiff,
+ parms = odeparms,
+ )
+
+ # Output transformation for models with unobserved compartments like SFORB
+ out_transformed <- data.frame(time = out[,"time"])
+ for (var in names(model$map)) {
+ if(length(model$map[[var]]) == 1) {
+ out_transformed[var] <- out[, var]
+ } else {
+ out_transformed[var] <- rowSums(out[, model$map[[var]]])
+ }
+ }
+
+ cat("\n<PLOT MODEL START>\n")
+ print(out_transformed)
+ cat("<PLOT MODEL END>\n")
+}
+
diff --git a/CakeOlsFit.R b/CakeOlsFit.R
new file mode 100644
index 0000000..75ac471
--- /dev/null
+++ b/CakeOlsFit.R
@@ -0,0 +1,326 @@
+# Originally: mkinfit.R 87 2010-12-09 07:31:59Z jranke $
+
+# Based on code in mkinfit
+# Portions Johannes Ranke 2010
+# Contact: mkin-devel@lists.berlios.de
+# The summary function is an adapted and extended version of summary.modFit
+# from the FME package, v 1.1 by Soetart and Petzoldt, which was in turn
+# inspired by summary.nls.lm
+
+#$Id: CakeOlsFit.R 216 2011-07-05 14:35:03Z nelr $
+# This version has been modified to expect SFO parameterised as k and flow fractions
+# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta
+# Authors: Rob Nelson, Richard Smith
+# Tessella Project Reference: 6245
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.”
+
+CakeOlsFit <- function(mkinmod, observed,
+ parms.ini = rep(0.1, length(mkinmod$parms)),
+ state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)),
+ lower = 0, upper = Inf,
+ fixed_parms = NULL,
+ fixed_initials = names(mkinmod$diffs)[-1],
+ eigen = TRUE,
+ plot = FALSE, quiet = FALSE,
+ err = NULL, weight = "none", scaleVar = FALSE,
+ atol = 1e-6,
+ ...)
+{
+ mod_vars <- names(mkinmod$diffs)
+ # Subset dataframe with mapped (modelled) variables
+ observed <- subset(observed, name %in% names(mkinmod$map))
+ # Get names of observed variables
+ obs_vars = unique(as.character(observed$name))
+
+ # Name the parameters if they are not named yet
+ if(is.null(names(parms.ini))) names(parms.ini) <- mkinmod$parms
+
+ # Name the inital parameter values if they are not named yet
+ if(is.null(names(state.ini))) names(state.ini) <- mod_vars
+
+ # Parameters to be optimised
+ parms.fixed <- parms.ini[fixed_parms]
+ optim_parms <- setdiff(names(parms.ini), fixed_parms)
+ parms.optim <- parms.ini[optim_parms]
+
+ state.ini.fixed <- state.ini[fixed_initials]
+ optim_initials <- setdiff(names(state.ini), fixed_initials)
+ state.ini.optim <- state.ini[optim_initials]
+ state.ini.optim.boxnames <- names(state.ini.optim)
+ if(length(state.ini.optim) > 0) {
+ names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_")
+ }
+
+ # Decide if the solution of the model can be based on a simple analytical
+ # formula, the spectral decomposition of the matrix (fundamental system)
+ # or a numeric ode solver from the deSolve package
+ if (length(mkinmod$map) == 1) {
+ solution = "analytical"
+ } else {
+ if (is.matrix(mkinmod$coefmat) & eigen) solution = "eigen"
+ else solution = "deSolve"
+ }
+
+ # Create a function calculating the differentials specified by the model
+ # if necessary
+ if(solution == "deSolve") {
+ mkindiff <- function(t, state, parms) {
+ time <- t
+ diffs <- vector()
+ for (box in mod_vars)
+ {
+ diffname <- paste("d", box, sep="_")
+ diffs[diffname] <- with(as.list(c(time,state, parms)),
+ eval(parse(text=mkinmod$diffs[[box]])))
+ }
+ return(list(c(diffs)))
+ }
+ }
+
+ cost.old <- 1e100
+ calls <- 0
+ out_predicted <- NA
+ # Define the model cost function
+ cost <- function(P)
+ {
+ assign("calls", calls+1, inherits=TRUE)
+ if(length(state.ini.optim) > 0) {
+ odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed)
+ names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed))
+ } else odeini <- state.ini.fixed
+
+ odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], parms.fixed)
+
+ # Ensure initial state is at time 0
+ outtimes = unique(c(0,observed$time))
+ evalparse <- function(string)
+ {
+ eval(parse(text=string), as.list(c(odeparms, odeini)))
+ }
+
+ # Solve the system
+ if (solution == "analytical") {
+ parent.type = names(mkinmod$map[[1]])[1]
+ parent.name = names(mkinmod$diffs)[[1]]
+ o <- switch(parent.type,
+ SFO = SFO.solution(outtimes,
+ evalparse(parent.name),
+ evalparse(paste("k", parent.name, sep="_"))),
+# evalparse("k")),
+# evalparse(paste("k", parent.name, "sink", sep="_"))),
+ FOMC = FOMC.solution(outtimes,
+ evalparse(parent.name),
+ evalparse("alpha"), evalparse("beta")),
+ DFOP = DFOP.solution(outtimes,
+ evalparse(parent.name),
+ evalparse("k1"), evalparse("k2"),
+ evalparse("g")),
+ HS = HS.solution(outtimes,
+ evalparse(parent.name),
+ evalparse("k1"), evalparse("k2"),
+ evalparse("tb")),
+ SFORB = SFORB.solution(outtimes,
+ evalparse(parent.name),
+ evalparse(paste("k", parent.name, "bound", sep="_")),
+ evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")),
+ evalparse(paste("k", parent.name, "sink", sep="_")))
+ )
+ out <- cbind(outtimes, o)
+ dimnames(out) <- list(outtimes, c("time", sub("_free", "", parent.name)))
+ }
+ if (solution == "eigen") {
+ coefmat.num <- matrix(sapply(as.vector(mkinmod$coefmat), evalparse),
+ nrow = length(mod_vars))
+ e <- eigen(coefmat.num)
+ c <- solve(e$vectors, odeini)
+ f.out <- function(t) {
+ e$vectors %*% diag(exp(e$values * t), nrow=length(mod_vars)) %*% c
+ }
+ o <- matrix(mapply(f.out, outtimes),
+ nrow = length(mod_vars), ncol = length(outtimes))
+ dimnames(o) <- list(mod_vars, outtimes)
+ out <- cbind(time = outtimes, t(o))
+ }
+ if (solution == "deSolve")
+ {
+ out <- ode(
+ y = odeini,
+ times = outtimes,
+ func = mkindiff,
+ parms = odeparms,
+ atol = atol
+ )
+ }
+
+ # Output transformation for models with unobserved compartments like SFORB
+ out_transformed <- data.frame(time = out[,"time"])
+ for (var in names(mkinmod$map)) {
+ if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") {
+ out_transformed[var] <- out[, var]
+ } else {
+ out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]])
+ }
+ }
+ assign("out_predicted", out_transformed, inherits=TRUE)
+
+ mC <- CakeCost(out_transformed, observed, y = "value",
+ err = err, weight = weight, scaleVar = scaleVar)
+ mC$penalties <- CakePenalties(odeparms, out_transformed, observed)
+ mC$model <- mC$cost + mC$penalties;
+ if (mC$model < cost.old) {
+ if (!quiet)
+ cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model,
+ "\n")
+
+ # Plot the data and current model output if requested
+ if(plot) {
+ outtimes_plot = seq(min(observed$time), max(observed$time), length.out=100)
+ if (solution == "analytical") {
+ o_plot <- switch(parent.type,
+ SFO = SFO.solution(outtimes_plot,
+ evalparse(parent.name),
+ evalparse(paste("k", parent.name, sep="_"))),
+# evalparse(paste("k", parent.name, "sink", sep="_"))),
+ FOMC = FOMC.solution(outtimes_plot,
+ evalparse(parent.name),
+ evalparse("alpha"), evalparse("beta")),
+ DFOP = DFOP.solution(outtimes_plot,
+ evalparse(parent.name),
+ evalparse("k1"), evalparse("k2"),
+ evalparse("g")),
+ HS = HS.solution(outtimes_plot,
+ evalparse(parent.name),
+ evalparse("k1"), evalparse("k2"),
+ evalparse("tb")),
+ SFORB = SFORB.solution(outtimes_plot,
+ evalparse(parent.name),
+ evalparse(paste("k", parent.name, "bound", sep="_")),
+ evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")),
+ evalparse(paste("k", parent.name, "sink", sep="_")))
+ )
+ out_plot <- cbind(outtimes_plot, o_plot)
+ dimnames(out_plot) <- list(outtimes_plot, c("time", sub("_free", "", parent.name)))
+ }
+ if(solution == "eigen") {
+ o_plot <- matrix(mapply(f.out, outtimes_plot),
+ nrow = length(mod_vars), ncol = length(outtimes_plot))
+ dimnames(o_plot) <- list(mod_vars, outtimes_plot)
+ out_plot <- cbind(time = outtimes_plot, t(o_plot))
+ }
+ if (solution == "deSolve") {
+ out_plot <- ode(
+ y = odeini,
+ times = outtimes_plot,
+ func = mkindiff,
+ parms = odeparms)
+ }
+ out_transformed_plot <- data.frame(time = out_plot[,"time"])
+ for (var in names(mkinmod$map)) {
+ if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") {
+ out_transformed_plot[var] <- out_plot[, var]
+ } else {
+ out_transformed_plot[var] <- rowSums(out_plot[, mkinmod$map[[var]]])
+ }
+ }
+ out_transformed_plot <<- out_transformed_plot
+
+ plot(0, type="n",
+ xlim = range(observed$time), ylim = range(observed$value, na.rm=TRUE),
+ xlab = "Time", ylab = "Observed")
+ col_obs <- pch_obs <- 1:length(obs_vars)
+ names(col_obs) <- names(pch_obs) <- obs_vars
+ for (obs_var in obs_vars) {
+ points(subset(observed, name == obs_var, c(time, value)),
+ pch = pch_obs[obs_var], col = col_obs[obs_var])
+ }
+ matlines(out_transformed_plot$time, out_transformed_plot[-1])
+ legend("topright", inset=c(0.05, 0.05), legend=obs_vars,
+ col=col_obs, pch=pch_obs, lty=1:length(pch_obs))
+ }
+
+ assign("cost.old", mC$model, inherits=TRUE)
+ }
+ # HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost
+ mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res)
+
+ return(mC)
+ }
+ fit <-modFit(cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, ...)
+
+ # We need to return some more data for summary and plotting
+ fit$solution <- solution
+ if (solution == "eigen") {
+ fit$coefmat <- mkinmod$coefmat
+ }
+ if (solution == "deSolve") {
+ fit$mkindiff <- mkindiff
+ }
+ if (plot == TRUE) {
+ fit$out_transformed_plot = out_transformed_plot
+ }
+
+ # We also need various other information for summary and plotting
+ fit$map <- mkinmod$map
+ fit$diffs <- mkinmod$diffs
+
+ # mkin_long_to_wide does not handle ragged data
+# fit$observed <- mkin_long_to_wide(observed)
+ fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time")
+ names(fit$observed) <- c("time", as.vector(unique(observed$name)))
+
+ predicted_long <- mkin_wide_to_long(out_predicted, time = "time")
+ fit$predicted <- out_predicted
+
+ # Collect initial parameter values in two dataframes
+ fit$start <- data.frame(initial = c(state.ini.optim, parms.optim))
+ fit$start$type = c(rep("state", length(state.ini.optim)), rep("deparm", length(parms.optim)))
+ fit$start$lower <- lower
+ fit$start$upper <- upper
+
+ fit$fixed <- data.frame(
+ value = c(state.ini.fixed, parms.fixed))
+ fit$fixed$type = c(rep("state", length(state.ini.fixed)), rep("deparm", length(parms.fixed)))
+
+# # Calculate chi2 error levels according to FOCUS (2006)
+ fit$errmin <- CakeChi2(observed, predicted_long, obs_vars, parms.optim, state.ini.optim)
+
+ # Calculate dissipation times DT50 and DT90 and formation fractions
+ parms.all = c(fit$par, parms.fixed)
+ fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)),
+ row.names = obs_vars)
+ fit$ff <- vector()
+ fit$SFORB <- vector()
+ for (obs_var in obs_vars) {
+ type = names(mkinmod$map[[obs_var]])[1]
+
+ fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all)
+ }
+
+ fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed)
+
+ # Collect observed, predicted and residuals
+ data<-observed
+ data$err<-rep(NA,length(data$time))
+ data <- merge(data, predicted_long, by = c("time", "name"))
+ names(data)<-c("time", "variable", "observed","err-var", "predicted")
+ data$residual <- data$observed - data$predicted
+ data$variable <- ordered(data$variable, levels = obs_vars)
+ fit$data <- data[order(data$variable, data$time), ]
+ fit$atol <- atol
+
+ class(fit) <- c("CakeFit", "mkinfit", "modFit")
+ return(fit)
+}
+
diff --git a/CakeOlsPlot.R b/CakeOlsPlot.R
new file mode 100644
index 0000000..199cc28
--- /dev/null
+++ b/CakeOlsPlot.R
@@ -0,0 +1,117 @@
+#$Id: CakeOlsPlot.R 216 2011-07-05 14:35:03Z nelr $
+# Generates fitted curves so the GUI can plot them
+# Based on code in IRLSkinfit
+# Author: Rob Nelson (Tessella)
+# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta
+# Tessella Project Reference: 6245
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.”
+
+CakeOlsPlot <- function(fit, xlim = range(fit$data$time), ...)
+{
+ cat("<PLOT MODEL START>\n")
+
+ solution = fit$solution
+ if ( is.null(solution) ) {
+ solution <- "deSolve"
+ }
+ atol = fit$atol
+ if ( is.null(atol) ) {
+ atol = 1.0e-6
+ }
+
+ fixed <- fit$fixed$value
+ names(fixed) <- rownames(fit$fixed)
+ parms.all <- c(fit$par, fixed)
+ ininames <- c(
+ rownames(subset(fit$start, type == "state")),
+ rownames(subset(fit$fixed, type == "state")))
+ odeini <- parms.all[ininames]
+ names(odeini) <- names(fit$diffs)
+
+ outtimes <- seq(0, xlim[2], length.out=101)
+
+ odenames <- c(
+ rownames(subset(fit$start, type == "deparm")),
+ rownames(subset(fit$fixed, type == "deparm")))
+ odeparms <- parms.all[odenames]
+
+ # Solve the system
+ evalparse <- function(string)
+ {
+ eval(parse(text=string), as.list(c(odeparms, odeini)))
+ }
+ if (solution == "analytical") {
+ parent.type = names(fit$map[[1]])[1]
+ parent.name = names(fit$diffs)[[1]]
+ o <- switch(parent.type,
+ SFO = SFO.solution(outtimes,
+ evalparse(parent.name),
+ evalparse(paste("k", parent.name, sep="_"))),
+ FOMC = FOMC.solution(outtimes,
+ evalparse(parent.name),
+ evalparse("alpha"), evalparse("beta")),
+ DFOP = DFOP.solution(outtimes,
+ evalparse(parent.name),
+ evalparse("k1"), evalparse("k2"),
+ evalparse("g")),
+ HS = HS.solution(outtimes,
+ evalparse(parent.name),
+ evalparse("k1"), evalparse("k2"),
+ evalparse("tb")),
+ SFORB = SFORB.solution(outtimes,
+ evalparse(parent.name),
+ evalparse(paste("k", parent.name, "free_bound", sep="_")),
+ evalparse(paste("k", parent.name, "bound_free", sep="_")),
+ evalparse(paste("k", parent.name, "free_sink", sep="_")))
+ )
+ out <- cbind(outtimes, o)
+ dimnames(out) <- list(outtimes, c("time", parent.name))
+ }
+ if (solution == "eigen") {
+ coefmat.num <- matrix(sapply(as.vector(fit$coefmat), evalparse),
+ nrow = length(odeini))
+ e <- eigen(coefmat.num)
+ c <- solve(e$vectors, odeini)
+ f.out <- function(t) {
+ e$vectors %*% diag(exp(e$values * t), nrow=length(odeini)) %*% c
+ }
+ o <- matrix(mapply(f.out, outtimes),
+ nrow = length(odeini), ncol = length(outtimes))
+ dimnames(o) <- list(names(odeini), NULL)
+ out <- cbind(time = outtimes, t(o))
+ }
+ if (solution == "deSolve") {
+ out <- ode(
+ y = odeini,
+ times = outtimes,
+ func = fit$mkindiff,
+ parms = odeparms,
+ atol = atol
+ )
+ }
+
+ # Output transformation for models with unobserved compartments like SFORB
+ out_transformed <- data.frame(time = out[,"time"])
+ for (var in names(fit$map)) {
+ if(length(fit$map[[var]]) == 1) {
+ out_transformed[var] <- out[, var]
+ } else {
+ out_transformed[var] <- rowSums(out[, fit$map[[var]]])
+ }
+ }
+ print(out_transformed)
+
+ cat("<PLOT MODEL END>\n")
+}
diff --git a/CakePenalties.R b/CakePenalties.R
new file mode 100644
index 0000000..3b2a0df
--- /dev/null
+++ b/CakePenalties.R
@@ -0,0 +1,56 @@
+# $Id: CakePenalties.R 216 2011-07-05 14:35:03Z nelr $
+# The CAKE R modules are based on mkin
+# CAKE (6245), by Tessella, for Syngenta: Copyright (C) 2011 Syngenta
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.”
+
+# Penalty function for fits that should not be accepted
+CakePenalties<-function(params, modelled, obs, penalty.scale = 10, ...){
+ # Because the model cost is roughly proportional to points,
+ # make the penalties scale that way too for consistent behaviour
+ # with many or few data points
+ sum(CakePenaltiesLong(params, modelled, obs, penalty.scale, ...)$value) * dim(obs)[[1]]
+}
+CakePenaltiesLong<-function(params, modelled, obs, penalty.scale = 10, ...){
+ r<-data.frame(name=character(0), value=numeric(0))
+
+ # Flow fractions > 1
+ #-------------------------------
+ fails<-CakePenaltiesFF(params);
+ if(dim(fails)[[1]] > 0){
+ #print("Penalty failures:")
+ #print(fails)
+ # Add a failure record for each flow fraction that missed
+ names(fails)<-c('name', 'value')
+ fails$value = 100*(fails$value - 1)
+ fails$name = lapply(fails$name, function(x) paste("FF_", x, sep=""))
+ r <- rbind(r, fails)
+ #totalPenalties <- totalPenalties + 100*sum(fails$x - 1) # Penalty for failure
+ }
+
+ r$value <- r$value * penalty.scale
+ return(r)
+}
+
+CakePenaltiesFF<-function(params){
+ ff.values<-params["f_"==substr(names(params), 0, 2)] # Flow fractions
+ if(length(ff.values) > 0){
+ ffs<-data.frame(t(sapply(strsplit(names(ff.values), '_'), FUN=function(x){ x[c(2,4)] }))) # Split into source/dest
+ names(ffs)<-c('source', 'dest')
+ ffs['value'] <- ff.values # Add values
+ sums<-aggregate(ffs$value, list(s=ffs$source), sum) # Sum of flows from each source
+ fails<-sums[sums$x>1.00001,] # All compartments > 1
+ } else { fails<-data.frame(s=character(0), x=numeric(0)) }
+ return(fails)
+} \ No newline at end of file
diff --git a/CakeSummary.R b/CakeSummary.R
new file mode 100644
index 0000000..ec42e75
--- /dev/null
+++ b/CakeSummary.R
@@ -0,0 +1,301 @@
+# $Id: CakeSummary.R 216 2011-07-05 14:35:03Z nelr $
+# CakeSummary: Functions to calculate statistics used in the summary,
+# and display the summary itself.
+
+# Parts modified from package mkin
+# Parts Tessella (Rob Nelson/Richard Smith) for Syngenta: Copyright (C) 2011 Syngenta
+# Tessella Project Reference: 6245
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.”
+
+# Calculate the decay times for a compartment
+# Arguments
+# type - type of compartment
+# obs_var - compartment name
+# parms.all - list of parameters
+CakeDT<-function(type, obs_var, parms.all) {
+ if (type == "SFO") {
+ # Changed to new parameterisation
+ #k_names = grep(paste("k", obs_var, sep="_"), names(parms.all), value=TRUE)
+ k_name = paste("k", obs_var, sep="_")
+ k_tot = parms.all[k_name]
+ DT50 = log(2)/k_tot
+ DT90 = log(10)/k_tot
+ }
+ if (type == "FOMC") {
+ alpha = parms.all["alpha"]
+ beta = parms.all["beta"]
+ DT50 = beta * (2^(1/alpha) - 1)
+ DT90 = beta * (10^(1/alpha) - 1)
+ }
+ if (type == "DFOP") {
+ k1 = parms.all["k1"]
+ k2 = parms.all["k2"]
+ g = parms.all["g"]
+ f <- function(t, x) {
+ fraction <- g * exp( - k1 * t) + (1 - g) * exp( - k2 * t)
+ (fraction - (1 - x/100))^2
+ }
+ DTmax <- 1000
+ DT50.o <- optimize(f, c(0.001, DTmax), x=50)$minimum
+ DT50 = ifelse(DTmax - DT50.o < 0.1, NA, DT50.o)
+ DT90.o <- optimize(f, c(0.001, DTmax), x=90)$minimum
+ DT90 = ifelse(DTmax - DT90.o < 0.1, NA, DT90.o)
+ }
+ if (type == "HS") {
+ k1 = parms.all["k1"]
+ k2 = parms.all["k2"]
+ tb = parms.all["tb"]
+ DTx <- function(x) {
+ DTx.a <- (log(100/(100 - x)))/k1
+ DTx.b <- tb + (log(100/(100 - x)) - k1 * tb)/k2
+ if (DTx.a < tb) DTx <- DTx.a
+ else DTx <- DTx.b
+ return(DTx)
+ }
+ DT50 <- DTx(50)
+ DT90 <- DTx(90)
+ }
+ if (type == "SFORB") {
+ # FOCUS kinetics (2006), p. 60 f
+ k_out_names = grep(paste("k", obs_var, "free", sep="_"), names(parms.all), value=TRUE)
+ k_out_names = setdiff(k_out_names, paste("k", obs_var, "free", "bound", sep="_"))
+ k_1output = sum(parms.all[k_out_names])
+ k_12 = parms.all[paste("k", obs_var, "free", "bound", sep="_")]
+ k_21 = parms.all[paste("k", obs_var, "bound", "free", sep="_")]
+
+ sqrt_exp = sqrt(1/4 * (k_12 + k_21 + k_1output)^2 + k_12 * k_21 - (k_12 + k_1output) * k_21)
+ b1 = 0.5 * (k_12 + k_21 + k_1output) + sqrt_exp
+ b2 = 0.5 * (k_12 + k_21 + k_1output) - sqrt_exp
+
+ SFORB_fraction = function(t) {
+ ((k_12 + k_21 - b1)/(b2 - b1)) * exp(-b1 * t) +
+ ((k_12 + k_21 - b2)/(b1 - b2)) * exp(-b2 * t)
+ }
+ f_50 <- function(t) (SFORB_fraction(t) - 0.5)^2
+ max_DT <- 1000
+ DT50.o <- optimize(f_50, c(0.01, max_DT))$minimum
+ if (abs(DT50.o - max_DT) < 0.01) DT50 = NA else DT50 = DT50.o
+ f_90 <- function(t) (SFORB_fraction(t) - 0.1)^2
+ DT90.o <- optimize(f_90, c(0.01, 1000))$minimum
+ if (abs(DT90.o - max_DT) < 0.01) DT90 = NA else DT90 = DT90.o
+ }
+ ret<-c(DT50, DT90)
+ names(ret)<-c("DT50","DT90")
+ ret
+}
+
+# Compute chi2 etc
+# Arguments
+# observed - data.frame of observed data
+# predicted_long - fitted values etc
+# obs_vars - compartment names
+# parms.optim - list of fitted parameters
+# state,ini.optim - list of fitted initial values
+#
+CakeChi2<-function(observed, predicted_long, obs_vars, parms.optim, state.ini.optim) {
+ # Calculate chi2 error levels according to FOCUS (2006)
+ means <- aggregate(value ~ time + name, data = observed, mean, na.rm=TRUE)
+ errdata <- merge(means, predicted_long, by = c("time", "name"), suffixes = c("_mean", "_pred"))
+ errdata <- errdata[order(errdata$time, errdata$name), ]
+ errmin.overall <- mkinerrmin(errdata, length(parms.optim) + length(state.ini.optim))
+
+ errmin <- data.frame(err.min = errmin.overall$err.min,
+ n.optim = errmin.overall$n.optim, df = errmin.overall$df)
+ rownames(errmin) <- "All data"
+ for (obs_var in obs_vars)
+ {
+ errdata.var <- subset(errdata, name == obs_var)
+ n.k.optim <- length(grep(paste("k", obs_var, sep="_"), names(parms.optim)))
+ n.initials.optim <- length(grep(paste(obs_var, ".*", "_0", sep=""), names(state.ini.optim)))
+ n.optim <- n.k.optim + n.initials.optim
+ if ("alpha" %in% names(parms.optim)) n.optim <- n.optim + 1
+ if ("beta" %in% names(parms.optim)) n.optim <- n.optim + 1
+ if ("k1" %in% names(parms.optim)) n.optim <- n.optim + 1
+ if ("k2" %in% names(parms.optim)) n.optim <- n.optim + 1
+ if ("g" %in% names(parms.optim)) n.optim <- n.optim + 1
+ if ("tb" %in% names(parms.optim)) n.optim <- n.optim + 1
+ errmin.tmp <- mkinerrmin(errdata.var, n.optim)
+ errmin[obs_var, c("err.min", "n.optim", "df")] <- errmin.tmp
+ }
+ errmin
+}
+
+# Compute additional statistics (rē/efficiency) of obs v pred for each compartment and the whole problem
+CakeAdditionalStats<-function(obsvspred){
+ agg<-aggregate(obsvspred[c('observed', 'err-var', 'predicted', 'residual')], list(name=obsvspred$variable), function(x){x}, simplify=FALSE)
+ frames<-apply(agg, 1, as.data.frame);
+ names(frames)<-agg$name;
+ frames$all<-obsvspred
+
+ t(sapply(frames, function(frame){
+ # This function takes a data frame for a single variable and makes the stats for it
+ # rē: FOCUS eq 6.5 p. 97
+ # Efficiency: eq 6.6 p. 99
+ do <- frame$observed - mean(frame$observed)
+ dp <- frame$predicted - mean(frame$predicted)
+ r2 <- ( sum(do * dp) / sqrt(sum(do^2) * sum(dp^2)) ) ^ 2
+ eff <- 1 - ( sum((frame$predicted - frame$observed)^2) / sum(do^2) )
+ list(r2=r2, eff=eff)
+ }))
+}
+
+# Summarise a fit - used for CakeIrlsFit and CakeOlsFit
+summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, ff = TRUE, cov = FALSE,...) {
+ param <- object$par
+ pnames <- names(param)
+ p <- length(param)
+ covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance
+
+ rdf <- object$df.residual
+ resvar <- object$ssr / rdf
+ modVariance <- object$ssr / length(object$residuals)
+
+ if (!is.numeric(covar)) {
+# message <- "Cannot estimate covariance; system is singular"
+# warning(message)
+# covar <- matrix(data = NA, nrow = p, ncol = p)
+ covar=NULL
+ } else {
+ message <- "ok"
+ rownames(covar) <- colnames(covar) <-pnames
+ se <- sqrt(diag(covar) * resvar)
+ names(se) <- pnames
+ tval <- param / se
+ }
+
+ if (!all(object$start$lower >=0)) {
+ message <- "Note that the one-sided t-test may not be appropriate if
+ parameter values below zero are possible."
+ warning(message)
+ } else message <- "ok"
+
+ if(cov && !is.null(covar)) {
+ alpha <- 0.05
+ lci <- param + qt(alpha/2,rdf)*se
+ uci <- param + qt(1-alpha/2,rdf)*se
+ param <- cbind(param, se, tval, pt(tval, rdf, lower.tail = FALSE), lci, uci)
+ dimnames(param) <- list(pnames, c("Estimate", "Std. Error",
+ "t value", "Pr(>t)", "Lower CI", "Upper CI"))
+ ans <- list(residuals = object$residuals,
+ residualVariance = resvar,
+ sigma = sqrt(resvar),
+ modVariance = modVariance,
+ df = c(p, rdf), cov.unscaled = covar,
+ cov.scaled = covar * resvar,
+ info = object$info, niter = object$iterations,
+ stopmess = message,
+ par = param)
+ } else {
+ param <- cbind(param)
+ ans <- list(residuals = object$residuals,
+ residualVariance = resvar,
+ sigma = sqrt(resvar),
+ modVariance = modVariance,
+ df = c(p, rdf),
+ info = object$info, niter = object$iterations,
+ stopmess = message,
+ par = param)
+ }
+
+ ans$diffs <- object$diffs
+ if(data) {
+ ans$data <- object$data
+ ans$additionalstats = CakeAdditionalStats(object$data)
+ }
+ ans$start <- object$start
+ ans$fixed <- object$fixed
+ ans$errmin <- object$errmin
+ ans$penalties <- object$penalties
+ if(distimes) ans$distimes <- object$distimes
+ if(ff) ans$ff <- object$ff
+ if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB
+ class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.modFit")
+ return(ans)
+}
+
+# Print a summary. Used for CakeIrlsFit, CakeOlsFit and CakeMcmcFit
+# Expanded from print.summary.modFit
+print.summary.CakeFit <- function(x, digits = max(3, getOption("digits") - 3), ...) {
+ cat("\nEquations:\n")
+ print(noquote(as.character(x[["diffs"]])))
+ df <- x$df
+ rdf <- df[2]
+
+ cat("\nStarting values for optimised parameters:\n")
+ print(x$start)
+
+ cat("\nFixed parameter values:\n")
+ if(length(x$fixed$value) == 0) cat("None\n")
+ else print(x$fixed)
+
+ if ( !is.null(x$par) ) {
+ cat("\nOptimised parameters:\n")
+ printCoefmat(x$par, digits = digits, ...)
+ }
+
+ cat("\nResidual standard error:",
+ format(signif(x$sigma, digits)), "on", rdf, "degrees of freedom\n")
+
+ cat("\nChi2 error levels in percent:\n")
+ x$errmin$err.min <- 100 * x$errmin$err.min
+ print(x$errmin, digits=digits,...)
+
+ printdistimes <- !is.null(x$distimes)
+ if(printdistimes){
+ cat("\nEstimated disappearance times:\n")
+ print(x$distimes, digits=digits,...)
+ }
+
+ printff <- !is.null(x$ff)
+ if(printff){
+ cat("\nEstimated formation fractions:\n")
+ print(data.frame(ff = x$ff), digits=digits,...)
+ }
+
+ printSFORB <- !is.null(x$SFORB)
+ if(printSFORB){
+ cat("\nEstimated Eigenvalues of SFORB model(s):\n")
+ print(x$SFORB, digits=digits,...)
+ }
+
+ printcor <- !is.null(x$cov.unscaled)
+ if (printcor){
+ Corr <- cov2cor(x$cov.unscaled)
+ rownames(Corr) <- colnames(Corr) <- rownames(x$par)
+ cat("\nParameter correlation:\n")
+ print(Corr, digits = digits, ...)
+ }
+
+ printdata <- !is.null(x$data)
+ if (printdata){
+ cat("\nData:\n")
+ print(format(x$data, digits = digits, scientific = FALSE,...), row.names = FALSE)
+ }
+
+ if(!is.null(x$additionalstats)){
+ cat("\nAdditional Stats:\n");
+ print(x$additionalstats, digits=digits, ...)
+ }
+
+ if(!(is.null(x$penalties) || 0 == dim(x$penalties)[[1]])){
+ cat("\nPenalties:\n");
+ print(x$penalties, digits=digits, row.names = FALSE, ...)
+ }
+
+ if ( !is.null(x$seed) ) {
+ cat("\nRandom Seed:", x$seed, "\n")
+ }
+ invisible(x)
+}

Contact - Imprint