diff options
author | Johannes Ranke <jranke@uni-bremen.de> | 2017-10-18 10:17:59 +0200 |
---|---|---|
committer | Johannes Ranke <jranke@uni-bremen.de> | 2017-10-18 10:17:59 +0200 |
commit | be6d42ef636e8e1c9fdcfa6f8738ee10e885d75b (patch) | |
tree | b676def6da66527c056e25fa5a127b97ca3a5560 |
Version 1.4v1.4
-rw-r--r-- | CakeCost.R | 348 | ||||
-rw-r--r-- | CakeInit.R | 39 | ||||
-rw-r--r-- | CakeIrlsFit.R | 147 | ||||
-rw-r--r-- | CakeIrlsPlot.R | 25 | ||||
-rw-r--r-- | CakeMcmcFit.R | 292 | ||||
-rw-r--r-- | CakeModel.R | 261 | ||||
-rw-r--r-- | CakeNullPlot.R | 127 | ||||
-rw-r--r-- | CakeOlsFit.R | 326 | ||||
-rw-r--r-- | CakeOlsPlot.R | 117 | ||||
-rw-r--r-- | CakePenalties.R | 56 | ||||
-rw-r--r-- | CakeSummary.R | 301 |
11 files changed, 2039 insertions, 0 deletions
diff --git a/CakeCost.R b/CakeCost.R new file mode 100644 index 0000000..1cc3981 --- /dev/null +++ b/CakeCost.R @@ -0,0 +1,348 @@ +## ----------------------------------------------------------------------------- +## The model cost and residuals +## ----------------------------------------------------------------------------- +# The CAKE R modules are based on mkin, +# Call to approx is only performed if there are multiple non NA values +# which should prevent most of the crashes - Rob Nelson (Tessella) +# +# Modifications developed by Tessella Plc for Syngenta, Copyright (C) 2011 Syngenta +# Authors: Rob Nelson, Richard Smith +# Tessella Project Reference: 6245 +# +# This program is free software: you can +# redistribute them and/or modify them under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/># +# +# $Id: CakeCost.R 216 2011-07-05 14:35:03Z nelr $ + +CakeCost <- function (model, obs, x = "time", y = NULL, err = NULL, + weight = "none", scaleVar = FALSE, cost = NULL, ...) { + + ## convert vector to matrix + if (is.vector(obs)) { + cn <- names(obs) + obs <- matrix(data = obs, nrow = 1) + colnames(obs) <- cn + } + if (is.vector(model)) { + cn <- names(model) + model <- matrix(data=model, nrow = 1) + colnames(model) <- cn + } + + ##============================================================================= + ## Observations + ##============================================================================= + + ## The position of independent variable(s) + ix <- 0 + if (! is.null(x)) { # mapping required... + ## For now multiple independent variables are not supported... + if (length(x) > 1) + stop ("multiple independent variables in 'obs' are not yet supported") + + if (! is.character(x)) + stop ("'x' should be the *name* of the column with the independent variable in 'obs' or NULL") + ix <- which(colnames(obs) %in% x) + if (length(ix) != length(x)) + stop(paste("Independent variable column not found in observations", x)) + } else ix <- NULL + + ## The position of weighing values + ierr <- 0 + if (! is.null(err)) { + if (! is.character(err)) + stop ("'err' should be the *name* of the column with the error estimates in obs or NULL") + ierr <- which(colnames(obs) == err) # only one + if (length(ierr) == 0) + stop(paste("Column with error estimates not found in observations", err)) + } + + ## The dependent variables + type <- 1 # data input type: type 2 is table format, type 1 is long format... + + if (!is.null(y)) { # it is in table format; first column are names of observed data... + + Names <- as.character(unique(obs[, 1])) # Names of data sets, all data should be model variables... + Ndat <- length(Names) # Number of data sets + ilist <- 1:Ndat + if (! is.character(y)) + stop ("'y' should be the *name* of the column with the values of the dependent variable in obs") + iy <- which(colnames(obs) == y) + if (length(iy) == 0) + stop(paste("Column with value of dependent variable not found in observations", y)) + type <- 2 + + } else { # it is a matrix, variable names are column names + Ndat <- NCOL(obs)-1 + Names <- colnames(obs) + ilist <- (1:NCOL(obs)) # column positions of the (dependent) observed variables + exclude <- ix # exclude columns that are not + if (ierr > 0) + exclude <- c(ix, ierr) # exclude columns that are not + if (length(exclude) > 0) + ilist <- ilist[-exclude] + } + + #================================ + # The model results + #================================ + + ModNames <- colnames(model) # Names of model variables + if (length(ix) > 1) { + ixMod <- NULL + + for ( i in 1:length(ix)) { + ix2 <- which(colnames(model) == x[i]) + if (length(ix2) == 0) + stop(paste("Cannot calculate cost: independent variable not found in model output", x[i])) + ixMod <- c(ixMod, ix2) + } + + xMod <- model[,ixMod] # Independent variable, model + } else if (length(ix) == 1) { + ixMod <- which(colnames(model) == x) + if (length(ixMod) == 0) + stop(paste("Cannot calculate cost: independent variable not found in model output", x)) + xMod <- model[,ixMod] # Independent variable, model + } + Residual <- NULL + CostVar <- NULL + + #================================ + # Compare model and data... + #================================ + xDat <- 0 + iDat <- 1:nrow(obs) + + for (i in ilist) { # for each observed variable ... + ii <- which(ModNames == Names[i]) + if (length(ii) == 0) stop(paste("observed variable not found in model output", Names[i])) + yMod <- model[,ii] + if (type == 2) { # table format + iDat <- which (obs[,1] == Names[i]) + if (length(ix) > 0) xDat <- obs[iDat, ix] + obsdat <- obs[iDat, iy] + } else { + if (length(ix) > 0) xDat <- obs[, 1] + obsdat <- obs[,i] + } + ii <- which(is.na(obsdat)) + if (length(ii) > 0) { + xDat <- xDat[-ii] + obsdat <- obsdat[-ii] + } + + # CAKE version - Added tests for multiple non-NA values + if (length(ix) > 0 && length(unique(xMod[!is.na(xMod)]))>1 && length(yMod[!is.na(yMod)])>1) + { + ModVar <- approx(xMod, yMod, xout = xDat)$y + } + else { + cat("CakeCost Warning: Only one valid point - using mean (yMod was", yMod, ")\n") + ModVar <- mean(yMod[!is.na(yMod)]) + obsdat <- mean(obsdat) + } + iex <- which(!is.na(ModVar)) + ModVar <- ModVar[iex] + obsdat <- obsdat[iex] + xDat <- xDat[iex] + if (ierr > 0) { + Err <- obs[iDat, ierr] + Err <- Err[iex] + } else { + if (weight == "std") + Err <- sd(obsdat) + else if (weight == "mean") + Err <- mean(abs(obsdat)) + else if (weight == "none") + Err <- 1 + else + stop("error: do not recognize 'weight'; should be one of 'none', 'std', 'mean'") + } + if (any(is.na(Err))) + stop(paste("error: cannot estimate weighing for observed variable: ", Names[i])) + if (min(Err) <= 0) + stop(paste("error: weighing for observed variable is 0 or negative:", Names[i])) + if (scaleVar) + Scale <- 1/length(obsdat) + else Scale <- 1 + Res <- (ModVar- obsdat) + res <- Res/Err + resScaled <- res*Scale + Residual <- rbind(Residual, + data.frame( + name = Names[i], + x = xDat, + obs = obsdat, + mod = ModVar, + weight = 1/Err, + res.unweighted = Res, + res = res)) + + CostVar <- rbind(CostVar, + data.frame( + name = Names[i], + scale = Scale, + N = length(Res), + SSR.unweighted = sum(Res^2), + SSR.unscaled = sum(res^2), + SSR = sum(resScaled^2))) + } # end loop over all observed variables + + ## SSR + Cost <- sum(CostVar$SSR * CostVar$scale) + + Lprob <- -sum(log(pmax(0, dnorm(Residual$mod, Residual$obs, Err)))) # avoid log of negative values + #Lprob <- -sum(log(pmax(.Machine$double.xmin, dnorm(Residual$mod, Residual$obs, Err)))) #avoid log(0) + + if (! is.null(cost)) { + Cost <- Cost + cost$model + CostVar <- rbind(CostVar, cost$var) + Residual <- rbind(Residual, cost$residuals) + Lprob <- Lprob + cost$minlogp + } + out <- list(model = Cost, cost = Cost, minlogp = Lprob, var = CostVar, residuals = Residual) + class(out) <- "modCost" + return(out) +} + +## ----------------------------------------------------------------------------- +## S3 methods of modCost +## ----------------------------------------------------------------------------- + +plot.modCost<- function(x, legpos="topleft", ...) { + nvar <- nrow(x$var) + + dots <- list(...) + + dots$xlab <- if(is.null(dots$xlab)) "x" else dots$xlab + dots$ylab <- if(is.null(dots$ylab)) "weighted residuals" else dots$ylab + DotsPch <- if(is.null(dots$pch)) (16:24) else dots$pch + dots$pch <- if(is.null(dots$pch)) (16:24)[x$residuals$name] else dots$pch[x$residuals$name] + DotsCol <- if(is.null(dots$col)) (1:nvar) else dots$col + dots$col <- if(is.null(dots$col)) (1:nvar)[x$residuals$name] else dots$col[x$residuals$name] + + do.call("plot", c(alist(x$residuals$x, x$residuals$res), dots)) + +# plot(x$residuals$x, x$residuals$res, xlab="x", ylab="weighted residuals", +# pch=c(16:24)[x$residuals$name],col=c(1:nvar)[x$residuals$name],...) + + if (! is.na(legpos)) + legend(legpos, legend = x$var$name, col = DotsCol, pch = DotsPch) +} + +## ----------------------------------------------------------------------------- +## Internal cost function for optimisers +## ----------------------------------------------------------------------------- +# Cost function. The returned structure must have $model +# Called from the middle of IrlsFit and OlsFit +# We need to preserve state between calls so make a closure +CakeInternalCostFunctions <- function(mkinmod, state.ini.optim, state.ini.optim.boxnames, + state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, + quiet, plot=FALSE, atol=1e-6){ + cost.old <- 1e+100 + calls <- 0 + out_predicted <- NA + + get.predicted <- function(){ out_predicted } + + get.best.cost <- function(){ cost.old } + reset.best.cost <- function() { cost.old<<-1e+100 } + + get.calls <- function(){ calls } + set.calls <- function(newcalls){ calls <<- newcalls } + + set.error<-function(err) { observed$err <<- err } + + cost <- function(P) { + assign("calls", calls + 1, inherits = TRUE) + print(P) + if (length(state.ini.optim) > 0) { + odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed) + names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed)) + } + else odeini <- state.ini.fixed + odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], + parms.fixed) + # Ensure initial state is at time 0 + outtimes = unique(c(0,observed$time)) + out <- ode(y = odeini, times = outtimes, func = mkindiff, + parms = odeparms, atol=atol) + out_transformed <- data.frame(time = out[, "time"]) + for (var in names(mkinmod$map)) { + if (length(mkinmod$map[[var]]) == 1) { + out_transformed[var] <- out[, var] + } + else { + out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) + } + } + assign("out_predicted", out_transformed, inherits = TRUE) + mC <- CakeCost(out_transformed, observed, y = "value", + err = 'err', scaleVar = scaleVar) + mC$penalties <- CakePenalties(odeparms, out_transformed, observed) + mC$model <- mC$cost + mC$penalties; + if (mC$model < cost.old) { + if (!quiet) + cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model, + "\n") + if (plot) { + outtimes_plot = seq(min(observed$time), max(observed$time), + length.out = 100) + out_plot <- ode(y = odeini, times = outtimes_plot, + func = mkindiff, parms = odeparms, atol=atol) + out_transformed_plot <- data.frame(time = out_plot[, + "time"]) + for (var in names(mkinmod$map)) { + if (length(mkinmod$map[[var]]) == 1) { + out_transformed_plot[var] <- out_plot[, var] + } + else { + out_transformed_plot[var] <- rowSums(out_plot[, + mkinmod$map[[var]]]) + } + } + plot(0, type = "n", xlim = range(observed$time), + ylim = range(observed$value, na.rm = TRUE), + xlab = "Time", ylab = "Observed") + col_obs <- pch_obs <- 1:length(obs_vars) + names(col_obs) <- names(pch_obs) <- obs_vars + for (obs_var in obs_vars) { + points(subset(observed, name == obs_var, c(time, + value)), pch = pch_obs[obs_var], col = col_obs[obs_var]) + } + matlines(out_transformed_plot$time, out_transformed_plot[-1]) + legend("topright", inset = c(0.05, 0.05), legend = obs_vars, + col = col_obs, pch = pch_obs, lty = 1:length(pch_obs)) + } + assign("cost.old", mC$model, inherits = TRUE) + } + # HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost + if(mC$penalties > 0){ + #cat("Penalty adjustment: ",mC$penalties) + mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res) + } + + return(mC) + } + + + + list(cost=cost, + get.predicted=get.predicted, + get.calls=get.calls, set.calls=set.calls, + get.best.cost=get.best.cost, reset.best.cost=reset.best.cost, + set.error=set.error + ) +}
\ No newline at end of file diff --git a/CakeInit.R b/CakeInit.R new file mode 100644 index 0000000..3e609d5 --- /dev/null +++ b/CakeInit.R @@ -0,0 +1,39 @@ +#$Id: CakeInit.R 221 2011-11-01 14:12:45Z nelr $ + +# Purpose: Load the modules required by CAKE +# Call with chdir=TRUE so it can load our source from the current directory +# Tessella Project Reference: 6245 + +# Copyright (C) 2011 Syngenta + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +library(mkin) +source("CakeModel.R") +source("CakeOlsFit.R") +source("CakeIrlsFit.R") +source("CakeOlsPlot.R") +source("CakeIrlsPlot.R") +source("CakeNullPlot.R") +source("CakeCost.R") +source("CakePenalties.R") +source("CakeMcmcFit.R") +source("CakeSummary.R") + +options(width=1000) +options(error=recover) +options(warn=-1) + +# When running from C#, this must match the C# CAKE version +CAKE.version<-"1.4" diff --git a/CakeIrlsFit.R b/CakeIrlsFit.R new file mode 100644 index 0000000..bf20572 --- /dev/null +++ b/CakeIrlsFit.R @@ -0,0 +1,147 @@ +#$Id: CakeIrlsFit.R 216 2011-07-05 14:35:03Z nelr $ +# +# The CAKE R modules are based on mkin +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta +# Authors: Rob Nelson, Richard Smith +# Tessella Project Reference: 6245 + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +CakeIrlsFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)), + state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), lower = 0, + upper = Inf, fixed_parms = NULL, fixed_initials = names(mkinmod$diffs)[-1], + plot = FALSE, quiet = FALSE, err = NULL, weight = "none", + scaleVar = FALSE, atol=1e-6, control=list(),...) +{ + +### This is a modification based on the "mkinfit" function. +### version 0.1 July 20 +### +# This version has been modified to expect SFO parameterised as k and flow fractions +# Based on code in IRLSkinfit + NAind <-which(is.na(observed$value)) + mod_vars <- names(mkinmod$diffs) + observed <- subset(observed, name %in% names(mkinmod$map)) + ERR <- rep(1,nrow(observed)) + observed <- cbind(observed,err=ERR) + obs_vars = unique(as.character(observed$name)) + if (is.null(names(parms.ini))) + names(parms.ini) <- mkinmod$parms + mkindiff <- function(t, state, parms) { + time <- t + diffs <- vector() + for (box in mod_vars) { + diffname <- paste("d", box, sep = "_") + diffs[diffname] <- with(as.list(c(time, state, parms)), + eval(parse(text = mkinmod$diffs[[box]]))) + } + return(list(c(diffs))) + } + if (is.null(names(state.ini))) + names(state.ini) <- mod_vars + parms.fixed <- parms.ini[fixed_parms] + optim_parms <- setdiff(names(parms.ini), fixed_parms) + parms.optim <- parms.ini[optim_parms] + state.ini.fixed <- state.ini[fixed_initials] + optim_initials <- setdiff(names(state.ini), fixed_initials) + state.ini.optim <- state.ini[optim_initials] + state.ini.optim.boxnames <- names(state.ini.optim) + if (length(state.ini.optim) > 0) { + names(state.ini.optim) <- paste(names(state.ini.optim), + "0", sep = "_") + } + + costFunctions <- CakeInternalCostFunctions(mkinmod, state.ini.optim, state.ini.optim.boxnames, + state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, quiet, atol=atol) + + ############### Iteratively Reweighted Least Squares############# + ## Start with no weighting + fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, + upper = upper,control=control,...) + + if(length(control)==0) + { + irls.control <- list(maxIter=10,tol=1e-05) + control <- list(irls.control=irls.control) + }else{ + if(is.null(control$irls.control)) + { + irls.control <- list(maxIter=10,tol=1e-05) + control <- list(irls.control=irls.control) + } + } + irls.control <- control$irls.control + maxIter <- irls.control$maxIter + tol <- irls.control$tol + #### + niter <- 1 + ## insure one IRLS iteration + diffsigma <- 100 + olderr <- rep(1,length(mod_vars)) + while(diffsigma>tol & niter<=maxIter) + { + err <- sqrt(fit$var_ms_unweighted) + ERR <- err[as.character(observed$name)] + costFunctions$set.error(ERR) + diffsigma <- sum((err-olderr)^2) + cat("IRLS iteration at",niter, "; Diff in error variance ", diffsigma,"\n") + olderr <- err + fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, control=control, ...) + niter <- niter+1 + + ### If not converged, reweight and fit + } + + ########################################### + fit$mkindiff <- mkindiff + fit$map <- mkinmod$map + fit$diffs <- mkinmod$diffs + + out_predicted <- costFunctions$get.predicted() + + # mkin_long_to_wide does not handle ragged data + fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") + names(fit$observed) <- c("time", as.vector(unique(observed$name))) + + predicted_long <- mkin_wide_to_long(out_predicted, time = "time") + fit$predicted <- out_predicted + fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) + fit$start$type = c(rep("state", length(state.ini.optim)), + rep("deparm", length(parms.optim))) + fit$start$lower <- lower + fit$start$upper <- upper + fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) + fit$fixed$type = c(rep("state", length(state.ini.fixed)), + rep("deparm", length(parms.fixed))) + + fit$errmin <- CakeChi2(observed, predicted_long, obs_vars, parms.optim, state.ini.optim) + parms.all = c(fit$par, parms.fixed) + fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed) + fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), + DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) + for (obs_var in obs_vars) { + type = names(mkinmod$map[[obs_var]])[1] + fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all) + } + + data <- merge(observed, predicted_long, by = c("time", "name")) + + + names(data) <- c("time", "variable", "observed","err-var", "predicted") + data$residual <- data$observed - data$predicted + data$variable <- ordered(data$variable, levels = obs_vars) + fit$data <- data[order(data$variable, data$time), ] + class(fit) <- c("CakeFit", "mkinfit", "modFit") + return(fit) +} diff --git a/CakeIrlsPlot.R b/CakeIrlsPlot.R new file mode 100644 index 0000000..2de8910 --- /dev/null +++ b/CakeIrlsPlot.R @@ -0,0 +1,25 @@ +#$Id: CakeIrlsPlot.R 216 2011-07-05 14:35:03Z nelr $ +# Generates fitted curves so the GUI can plot them +# Author: Rob Nelson (Tessella) +# Tessella Project Reference: 6245 + +# Copyright (C) 2011 Syngenta +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +CakeIrlsPlot <- function(fit, xlim = range(fit$data$time), ...) +{ + CakeOlsPlot(fit, xlim, ...) +} + diff --git a/CakeMcmcFit.R b/CakeMcmcFit.R new file mode 100644 index 0000000..209f904 --- /dev/null +++ b/CakeMcmcFit.R @@ -0,0 +1,292 @@ +# $Id: CakeMcmcFit.R 216 2011-07-05 14:35:03Z nelr $ +# The CAKE R modules are based on mkin, +# Based on mcmckinfit as modified by Bayer +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta +# Author: Rob Nelson +# Tessella Project Reference: 6245 + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +CakeMcmcFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)), + state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), lower = 0, + upper = Inf, fixed_parms = NULL, fixed_initials = names(mkinmod$diffs)[-1], + plot = FALSE, quiet = FALSE, err = NULL, weight = "none", + scaleVar = FALSE, commonsigma=FALSE,jump = NULL,prior = NULL, + wvar0=0.1,niter = 1000, + outputlength = niter, burninlength = 0, updatecov = niter, + ntrydr = 1, drscale = NULL, verbose = TRUE,fitstart=TRUE,seed=NULL,atol=1e-6,...) +{ + + NAind <-which(is.na(observed$value)) + mod_vars <- names(mkinmod$diffs) + observed <- subset(observed, name %in% names(mkinmod$map)) + ERR <- rep(1,nrow(observed)) + observed <- cbind(observed,err=ERR) + obs_vars = unique(as.character(observed$name)) + if (is.null(names(parms.ini))) + names(parms.ini) <- mkinmod$parms + mkindiff <- function(t, state, parms) { + time <- t + diffs <- vector() + for (box in mod_vars) { + diffname <- paste("d", box, sep = "_") + diffs[diffname] <- with(as.list(c(time, state, parms)), + eval(parse(text = mkinmod$diffs[[box]]))) + } + return(list(c(diffs))) + } + if (is.null(names(state.ini))) + names(state.ini) <- mod_vars + parms.fixed <- parms.ini[fixed_parms] + optim_parms <- setdiff(names(parms.ini), fixed_parms) + parms.optim <- parms.ini[optim_parms] + state.ini.fixed <- state.ini[fixed_initials] + optim_initials <- setdiff(names(state.ini), fixed_initials) + state.ini.optim <- state.ini[optim_initials] + state.ini.optim.boxnames <- names(state.ini.optim) + if (length(state.ini.optim) > 0) { + names(state.ini.optim) <- paste(names(state.ini.optim), + "0", sep = "_") + } + + costFunctions <- CakeInternalCostFunctions(mkinmod, state.ini.optim, state.ini.optim.boxnames, + state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, quiet, atol=atol) + bestIteration <- -1; + costWithStatus <- function(P, ...){ + r <- costFunctions$cost(P) + if(r$cost == costFunctions$get.best.cost()) { + bestIteration<<-costFunctions$get.calls(); + cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n') + } + cat("MCMC Call no.", costFunctions$get.calls(), '\n') + return(r) + } + + # Set the seed + if ( is.null(seed) ) { + # No seed so create a random one so there is something to report + seed = runif(1,0,2^31-1) + } + seed = as.integer(seed) + set.seed(seed) + + if(fitstart==TRUE) + { + ## ############# Get Initial Paramtervalues ############# + ## Start with no weighting + fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, + upper = upper,...) + + if(commonsigma==TRUE){ + #observed$err <- rep(1,nrow(observed)) + cov0 <- summary(fit)$cov.scaled*2.4^2/length(fit$par) + costFunctions$set.calls(0); costFunctions$reset.best.cost() + res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose) + #res <- modMCMC(cost,fit$par,lower=lower,upper=upper,niter=niter) + }else{ + ## ############## One reweighted estimation ############################ + ## Estimate the error variance(sd) + tmpres <- fit$residuals + oldERR <- observed$err + err <- rep(NA,length(mod_vars)) + for(i in 1:length(mod_vars)) + { + box <- mod_vars[i] + ind <- which(names(tmpres)==box) + tmp <- tmpres[ind] + err[i] <- sd(tmp) + } + names(err) <- mod_vars + ERR <- err[as.character(observed$name)] + observed$err <-ERR + costFunctions$set.error(ERR) + olderr <- rep(1,length(mod_vars)) + diffsigma <- sum((err-olderr)^2) + ## At least do one iteration step to get a weighted LS + fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, ...) + ## Use this as the Input for MCMC algorithm + ## ########################## + fs <- summary(fit) + cov0 <- if(all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled*2.4^2/length(fit$par) + var0 <- fit$var_ms_unweighted + costFunctions$set.calls(0); costFunctions$reset.best.cost() + res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose) + + } + }else { + stop('fitstart=FALSE code removed until needed') + } + # Construct the fit object to return + fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) + fit$start$type = c(rep("state", length(state.ini.optim)), + rep("deparm", length(parms.optim))) + fit$start$lower <- lower + fit$start$upper <- upper + fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) + fit$fixed$type = c(rep("state", length(state.ini.fixed)), + rep("deparm", length(parms.fixed))) + + fit$mkindiff <- mkindiff + fit$map <- mkinmod$map + fit$diffs <- mkinmod$diffs + + # mkin_long_to_wide does not handle ragged data + fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") + #names(fit$observed) <- c("time", as.vector(unique(observed$name))) + fns <- function(str) { + fields <- strsplit(str, "\\.")[[1]] + if (fields[1] == "value") { + return(fields[2]) + } + else { + return(str) + } + } + names(fit$observed) <- sapply(names(fit$observed), fns) + + # Replace mean from modFit with mean from modMCMC + fnm <- function(x) mean(res$pars[,x]) + fit$par <- sapply(dimnames(res$pars)[[2]],fnm) + fit$bestpar <- res$bestpar + fit$costfn <- costWithStatus + + # Disappearence times + parms.all <- c(fit$par, parms.fixed) + obs_vars = unique(as.character(observed$name)) + fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), + DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) + for (obs_var in obs_vars) { + type = names(mkinmod$map[[obs_var]])[1] + fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all) + } + + + # Fits to data + odeini <- state.ini + odeparms <- parms.all + # If this step modelled the start amount, include it. Lookup in vectors returns NA not null if missing + if(!is.na(odeparms["Parent_0"])) { odeini['Parent'] = odeparms["Parent_0"] } + + # Solve the system + evalparse <- function(string) + { + eval(parse(text=string), as.list(c(odeparms, odeini))) + } + + # Ensure initial state is at time 0 + obstimes <- unique(c(0,observed$time)) + + out <- ode( + y = odeini, + times = obstimes, + func = mkindiff, + parms = odeparms, + ) + + # Output transformation for models with unobserved compartments like SFORB + out_transformed <- data.frame(time = out[,"time"]) + for (var in names(mkinmod$map)) { + if(length(mkinmod$map[[var]]) == 1) { + out_transformed[var] <- out[, var] + } else { + out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) + } + } + fit$predicted <- out_transformed + fit$penalties <- CakePenaltiesLong(odeparms, out_transformed, observed) + + predicted_long <- mkin_wide_to_long(out_transformed,time="time") + obs_vars = unique(as.character(observed$name)) + fit$errmin <- CakeChi2(observed, predicted_long, obs_vars, parms.optim, state.ini.optim) + + data<-observed + data$err<-rep(NA,length(data$time)) + data<-merge(data, predicted_long, by=c("time","name")) + names(data)<-c("time", "variable", "observed","err-var", "predicted") + data$residual<-data$observed-data$predicted + data$variable <- ordered(data$variable, levels = obs_vars) + fit$data <- data[order(data$variable, data$time), ] + + sq <- data$residual * data$residual + fit$ssr <- sum(sq) + + fit$seed = seed + + fit$res <- res + class(fit) <- c("CakeMcmcFit", "mkinfit", "modFit") + return(fit) +} + + +# Summarise a fit +summary.CakeMcmcFit <- function(object, data = TRUE, distimes = TRUE, ff = TRUE, cov = FALSE,...) { + param <- object$par + pnames <- names(param) + p <- length(param) + #covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance + mcmc <- object$res + covar <- cov(mcmc$pars) + + rdf <- object$df.residual + + message <- "ok" + rownames(covar) <- colnames(covar) <-pnames + + #se <- sqrt(diag(covar) * resvar) + fnse <- function(x) sd(mcmc$pars[,x]) #/sqrt(length(mcmc$pars[,x])) + se <- sapply(dimnames(mcmc$pars)[[2]],fnse) + + tval <- param / se + + if (!all(object$start$lower >=0)) { + message <- "Note that the one-sided t-test may not be appropriate if + parameter values below zero are possible." + warning(message) + } else message <- "ok" + + # Parameter fits + alpha <- 0.05 + lci <- param + qt(alpha/2,rdf)*se + uci <- param + qt(1-alpha/2,rdf)*se + param <- cbind(param, se, tval, pt(tval, rdf, lower.tail = FALSE), lci, uci) + dimnames(param) <- list(pnames, c("Estimate", "Std. Error", "t value", "Pr(>t)", "Lower CI", "Upper CI")) + + # Residuals from mean of MCMC fit + resvar <- object$ssr/ rdf + modVariance <- object$ssr / length(object$data$residual) + + ans <- list(residuals = object$data$residuals, + residualVariance = resvar, + sigma = sqrt(resvar), + modVariance = modVariance, + df = c(p, rdf), cov.unscaled = covar, + cov.scaled = covar * resvar, + info = object$info, niter = object$iterations, + stopmess = message, + par = param) + + ans$diffs <- object$diffs + ans$data <- object$data + ans$additionalstats = CakeAdditionalStats(object$data) + ans$seed <- object$seed + + ans$start <- object$start + ans$fixed <- object$fixed + ans$errmin <- object$errmin + if(distimes) ans$distimes <- object$distimes + if(ff) ans$ff <- object$ff + if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB + class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.modFit") + return(ans) +} diff --git a/CakeModel.R b/CakeModel.R new file mode 100644 index 0000000..a93d7af --- /dev/null +++ b/CakeModel.R @@ -0,0 +1,261 @@ +# Was Id: mkinmod.R 71 2010-09-12 01:13:36Z jranke +# $Id: CakeModel.R 216 2011-07-05 14:35:03Z nelr $ + +# The CAKE R modules are based on mkin +# Portions Johannes Ranke, 2010 +# Contact: mkin-devel@lists.berlios.de + +# This version has been modified to parameterise SFO as k and flow fractions +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta +# Tessella Project Reference: 6245 + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +CakeModel <- function(...) +{ + spec <- list(...) + obs_vars <- names(spec) + + # The returned model will be a list of character vectors, containing + # differential equations, parameter names and a mapping from model variables + # to observed variables. If possible, a matrix representation of the + # differential equations is included + parms <- vector() + diffs <- vector() + map <- list() + if(spec[[1]]$type %in% c("FOMC", "DFOP", "HS")) { + mat = FALSE + if(!is.null(spec[[1]]$to)) { + message <- paste("Only constant formation fractions over time are implemented.", + "Depending on the reason for the time dependence of degradation, this may be unrealistic", + sep="\n") + warning(message) + } else message <- "ok" + } else mat = TRUE + + mat = FALSE # XYZZY Not implemented yet assuming it should be + nlt = list() + + # Establish list of differential equations + for (varname in obs_vars) + { + if(is.null(spec[[varname]]$type)) stop( + "Every argument to mkinmod must be a list containing a type component") + if(!spec[[varname]]$type %in% c("SFO", "FOMC", "DFOP", "HS", "SFORB")) stop( + "Available types are SFO, FOMC, DFOP, HS and SFORB only") + new_parms <- vector() + + # New (sub)compartments (boxes) needed for the model type + new_boxes <- switch(spec[[varname]]$type, + SFO = varname, + FOMC = varname, + DFOP = varname, + HS = varname, + SFORB = paste(varname, c("free", "bound"), sep="_") + ) + map[[varname]] <- new_boxes + names(map[[varname]]) <- rep(spec[[varname]]$type, length(new_boxes)) + + # Start a new differential equation for each new box + new_diffs <- paste("d_", new_boxes, " =", sep="") + + # Turn on sink if not specified otherwise + if(is.null(spec[[varname]]$sink)) spec[[varname]]$sink <- TRUE + + #@@@ADD SFO k HERE !!!!!!!!!!!!! + # Construct and add SFO term and add SFO parameters if needed + if(spec[[varname]]$type == "SFO") { + # From p. 53 of the FOCUS kinetics report + k_term <- paste("k", new_boxes[[1]], sep="_") + nonlinear_term <- paste(k_term, new_boxes[[1]], sep=" * ") + spec[[varname]]$nlt<-nonlinear_term + new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) + new_parms <- k_term + ff <- vector() + } + + # Construct and add FOMC term and add FOMC parameters if needed + if(spec[[varname]]$type == "FOMC") { + if(match(varname, obs_vars) != 1) { + stop("Type FOMC is only possible for the first compartment, which is assumed to be the source compartment") + } +# if(spec[[varname]]$sink == FALSE) { +# stop("Turning off the sink for the FOMC model is not implemented") +# } + # From p. 53 of the FOCUS kinetics report + nonlinear_term <- paste("(alpha/beta) * ((time/beta) + 1)^-1 *", new_boxes[[1]]) + spec[[varname]]$nlt<-nonlinear_term + new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) + new_parms <- c("alpha", "beta") + ff <- vector() + } + + # Construct and add DFOP term and add DFOP parameters if needed + if(spec[[varname]]$type == "DFOP") { + if(match(varname, obs_vars) != 1) { + stop("Type DFOP is only possible for the first compartment, which is assumed to be the source compartment") + } +# if(spec[[varname]]$sink == FALSE) { +# stop("Turning off the sink for the DFOP model is not implemented") +# } + # From p. 57 of the FOCUS kinetics report + nonlinear_term <- paste("((k1 * g * exp(-k1 * time) + k2 * (1 - g) * exp(-k2 * time)) / (g * exp(-k1 * time) + (1 - g) * exp(-k2 * time))) *", new_boxes[[1]]) + spec[[varname]]$nlt<-nonlinear_term + new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) + new_parms <- c("k1", "k2", "g") + ff <- vector() + } + + # Construct and add HS term and add HS parameters if needed + if(spec[[varname]]$type == "HS") { + if(match(varname, obs_vars) != 1) { + stop("Type HS is only possible for the first compartment, which is assumed to be the source compartment") + } +# if(spec[[varname]]$sink == FALSE) { +# stop("Turning off the sink for the HS model is not implemented") +# } + # From p. 55 of the FOCUS kinetics report +# nonlinear_term <- paste("ifelse(time <= tb, k1, k2)", "*", new_boxes[[1]]) + nonlinear_term <- paste("((k1 - k2) / ( 1 + exp( 10*(time - tb) ) ) + k2)", "*", new_boxes[[1]]) + spec[[varname]]$nlt<-nonlinear_term + new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) + new_parms <- c("k1", "k2", "tb") + ff <- vector() + } + + # Construct terms for transfer to sink and add if appropriate + + #@@@@ REMOVE THIS ????? + if(spec[[varname]]$sink) { + # Add first-order sink term to first (or only) box for SFO and SFORB +# if(spec[[varname]]$type %in% c("SFO", "SFORB")) { + if(spec[[varname]]$type == "SFORB") { + k_compound_sink <- paste("k", new_boxes[[1]], "sink", sep="_") + sink_term <- paste("-", k_compound_sink, "*", new_boxes[[1]]) + new_diffs[[1]] <- paste(new_diffs[[1]], sink_term) + new_parms <- k_compound_sink + } + } + + # Add reversible binding if appropriate + if(spec[[varname]]$type == "SFORB") { + k_free_bound <- paste("k", varname, "free", "bound", sep="_") + k_bound_free <- paste("k", varname, "bound", "free", sep="_") + reversible_binding_terms <- c( + paste("-", k_free_bound, "*", new_boxes[[1]], "+", k_bound_free, "*", new_boxes[[2]]), + paste("+", k_free_bound, "*", new_boxes[[1]], "-", k_bound_free, "*", new_boxes[[2]])) + new_diffs <- paste(new_diffs, reversible_binding_terms) + new_parms <- c(new_parms, k_free_bound, k_bound_free) + } + + # Add observed variable to model + parms <- c(parms, new_parms) + names(new_diffs) <- new_boxes + diffs <- c(diffs, new_diffs) + } + # Transfer between compartments + for (varname in obs_vars) { + to <- spec[[varname]]$to + if(!is.null(to)) { + origin_box <- switch(spec[[varname]]$type, + SFO = varname, + FOMC = varname, + DFOP = varname, + HS = varname, + SFORB = paste(varname, "free", sep="_")) + fraction_left <- NULL + for (target in to) { + target_box <- switch(spec[[target]]$type, + SFO = target, + SFORB = paste(target, "free", sep="_")) + # SFO is no longer special + #if(spec[[varname]]$type %in% c("SFO", "SFORB")) { + if(spec[[varname]]$type == "SFORB") { + k_from_to <- paste("k", origin_box, target_box, sep="_") + diffs[[origin_box]] <- paste(diffs[[origin_box]], "-", + k_from_to, "*", origin_box) + diffs[[target_box]] <- paste(diffs[[target_box]], "+", + k_from_to, "*", origin_box) + parms <- c(parms, k_from_to) + } + # Handle SFO like the others +# if(spec[[varname]]$type %in% c("FOMC", "DFOP", "HS")) { + if(spec[[varname]]$type %in% c("FOMC", "DFOP", "HS", "SFO")) { + if ( length(to)==1 && !spec[[varname]]$sink ) { + # There is only one output, so no need for any flow fractions. Just add the whole flow from the parent + diffs[[target_box]] <- paste(diffs[[target_box]], "+", spec[[varname]]$nlt) + } else { + fraction_to_target = paste("f", varname, "to", target, sep="_") + fraction_not_to_target = paste("(1 - ", fraction_to_target, ")", sep="") + if(is.null(fraction_left)) { + fraction_really_to_target = fraction_to_target + fraction_left = fraction_not_to_target + } else { + # If this is the last output and there is no sink, it gets what's left + if ( target==tail(to,1) && !spec[[varname]]$sink ) { + fraction_really_to_target = fraction_left + } else { +# (1-fa)fb version +# fraction_really_to_target = paste(fraction_left, " * ", fraction_to_target, sep="") +# fraction_left = paste(fraction_left, " * ", fraction_not_to_target, sep="") +# fb version + fraction_really_to_target = fraction_to_target + fraction_left = paste(fraction_left, " - ", fraction_to_target, sep="") + } + } + ff[target_box] = fraction_really_to_target + diffs[[target_box]] <- paste(diffs[[target_box]], "+", ff[target_box], "*", spec[[varname]]$nlt) + # Add the flow fraction parameter (if it exists) + if ( target!=tail(to,1) || spec[[varname]]$sink ) { + parms <- c(parms, fraction_to_target) + } + } + } + } + } + } + model <- list(diffs = diffs, parms = parms, map = map) + + # Create coefficient matrix if appropriate + if (mat) { + boxes <- names(diffs) + n <- length(boxes) + m <- matrix(nrow=n, ncol=n, dimnames=list(boxes, boxes)) + for (from in boxes) { + for (to in boxes) { + if (from == to) { + #@@@@ OMIT NEXT LINE? !!!!! + k.candidate = paste("k", from, c(boxes, "sink"), sep="_") + k.candidate = sub("free.*bound", "free_bound", k.candidate) + k.candidate = sub("bound.*free", "bound_free", k.candidate) + k.effective = intersect(model$parms, k.candidate) + m[from,to] = ifelse(length(k.effective) > 0, + paste("-", k.effective, collapse = " "), "0") + } else { + k.candidate = paste("k", from, to, sep="_") + k.candidate = sub("free.*bound", "free_bound", k.candidate) + k.candidate = sub("bound.*free", "bound_free", k.candidate) + k.effective = intersect(model$parms, k.candidate) + m[to, from] = ifelse(length(k.effective) > 0, + k.effective, "0") + } + } + } + model$coefmat <- m + } + + if (exists("ff")) model$ff = ff + class(model) <- "mkinmod" + invisible(model) +} diff --git a/CakeNullPlot.R b/CakeNullPlot.R new file mode 100644 index 0000000..3eacd67 --- /dev/null +++ b/CakeNullPlot.R @@ -0,0 +1,127 @@ +#$Id: CakeNullPlot.R 216 2011-07-05 14:35:03Z nelr $ +# Generates model curves so the GUI can plot them. Used when all params are fixed so there was no fit +# The CAKE R modules are based on mkin +# Based on code in IRLSkinfit +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta +# Author: Rob Nelson (Tessella) +# Tessella Project Reference: 6245 + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +CakeNullPlot <- function(model, parms.ini, state.ini, observed, xlim = range(observed$time), digits = max(3, getOption("digits") - 3), ...) +{ + # Print the fixed values + fixed <- data.frame(value = c(state.ini, parms.ini)) + fixed$type = c(rep("state", length(state.ini)), rep("deparm", length(parms.ini))) + cat("\nFixed parameter values:\n") + print(fixed) + + # Print disappearence times + obs_vars = unique(as.character(observed$name)) + distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), + DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) + for (obs_var in obs_vars) { + type = names(model$map[[obs_var]])[1] + if(exists("DT50")) distimes[obs_var, ] = c(DT50, DT90) + distimes[obs_var, ] = CakeDT(type,obs_var,parms.ini) + } + cat("\nEstimated disappearance times:\n") + print(distimes, digits=digits,...) + + # Plot the model + + outtimes <- seq(0, xlim[2], length.out=101) + + odeini <- state.ini + odenames <- model$parms + odeparms <- params.ini + + # Solve the system + evalparse <- function(string) + { + eval(parse(text=string), as.list(c(odeparms, odeini))) + } + + mod_vars <- names(model$diffs) + mkindiff <- function(t, state, parms) { + time <- t + diffs <- vector() + for (box in mod_vars) { + diffname <- paste("d", box, sep = "_") + diffs[diffname] <- with(as.list(c(time, state, parms)), + eval(parse(text = model$diffs[[box]]))) + } + return(list(c(diffs))) + } + + # Ensure initial state is at time 0 + obstimes <- unique(c(0,observed$time)) + out <- ode( + y = odeini, + times = obstimes, + func = mkindiff, + parms = odeparms, + ) + + # Output transformation for models with unobserved compartments like SFORB + out_transformed <- data.frame(time = out[,"time"]) + for (var in names(model$map)) { + if(length(model$map[[var]]) == 1) { + out_transformed[var] <- out[, var] + } else { + out_transformed[var] <- rowSums(out[, model$map[[var]]]) + } + } + + predicted_long <- mkin_wide_to_long(out_transformed,time="time") + # Calculate chi2 error levels according to FOCUS (2006) + errmin<-CakeChi2(observed, predicted_long, obs_vars, c(), c()) + cat("\nChi2 error levels in percent:\n") + errmin$err.min <- 100 * errmin$err.min + print(errmin, digits=digits,...) + + + # Fit to data + data<-observed + data$err<-rep(NA,length(data$time)) + data<-merge(data, predicted_long, by=c("time","name")) + names(data)<-c("time", "variable", "observed","err-var", "predicted") + data$residual<-data$observed-data$predicted + data<-data[order(data$variable,data$time),] + cat("\nData:\n") + print(format(data, digits = digits, scientific = FALSE,...), row.names = FALSE) + + + out <- ode( + y = odeini, + times = outtimes, + func = mkindiff, + parms = odeparms, + ) + + # Output transformation for models with unobserved compartments like SFORB + out_transformed <- data.frame(time = out[,"time"]) + for (var in names(model$map)) { + if(length(model$map[[var]]) == 1) { + out_transformed[var] <- out[, var] + } else { + out_transformed[var] <- rowSums(out[, model$map[[var]]]) + } + } + + cat("\n<PLOT MODEL START>\n") + print(out_transformed) + cat("<PLOT MODEL END>\n") +} + diff --git a/CakeOlsFit.R b/CakeOlsFit.R new file mode 100644 index 0000000..75ac471 --- /dev/null +++ b/CakeOlsFit.R @@ -0,0 +1,326 @@ +# Originally: mkinfit.R 87 2010-12-09 07:31:59Z jranke $ + +# Based on code in mkinfit +# Portions Johannes Ranke 2010 +# Contact: mkin-devel@lists.berlios.de +# The summary function is an adapted and extended version of summary.modFit +# from the FME package, v 1.1 by Soetart and Petzoldt, which was in turn +# inspired by summary.nls.lm + +#$Id: CakeOlsFit.R 216 2011-07-05 14:35:03Z nelr $ +# This version has been modified to expect SFO parameterised as k and flow fractions +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta +# Authors: Rob Nelson, Richard Smith +# Tessella Project Reference: 6245 + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +CakeOlsFit <- function(mkinmod, observed, + parms.ini = rep(0.1, length(mkinmod$parms)), + state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), + lower = 0, upper = Inf, + fixed_parms = NULL, + fixed_initials = names(mkinmod$diffs)[-1], + eigen = TRUE, + plot = FALSE, quiet = FALSE, + err = NULL, weight = "none", scaleVar = FALSE, + atol = 1e-6, + ...) +{ + mod_vars <- names(mkinmod$diffs) + # Subset dataframe with mapped (modelled) variables + observed <- subset(observed, name %in% names(mkinmod$map)) + # Get names of observed variables + obs_vars = unique(as.character(observed$name)) + + # Name the parameters if they are not named yet + if(is.null(names(parms.ini))) names(parms.ini) <- mkinmod$parms + + # Name the inital parameter values if they are not named yet + if(is.null(names(state.ini))) names(state.ini) <- mod_vars + + # Parameters to be optimised + parms.fixed <- parms.ini[fixed_parms] + optim_parms <- setdiff(names(parms.ini), fixed_parms) + parms.optim <- parms.ini[optim_parms] + + state.ini.fixed <- state.ini[fixed_initials] + optim_initials <- setdiff(names(state.ini), fixed_initials) + state.ini.optim <- state.ini[optim_initials] + state.ini.optim.boxnames <- names(state.ini.optim) + if(length(state.ini.optim) > 0) { + names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_") + } + + # Decide if the solution of the model can be based on a simple analytical + # formula, the spectral decomposition of the matrix (fundamental system) + # or a numeric ode solver from the deSolve package + if (length(mkinmod$map) == 1) { + solution = "analytical" + } else { + if (is.matrix(mkinmod$coefmat) & eigen) solution = "eigen" + else solution = "deSolve" + } + + # Create a function calculating the differentials specified by the model + # if necessary + if(solution == "deSolve") { + mkindiff <- function(t, state, parms) { + time <- t + diffs <- vector() + for (box in mod_vars) + { + diffname <- paste("d", box, sep="_") + diffs[diffname] <- with(as.list(c(time,state, parms)), + eval(parse(text=mkinmod$diffs[[box]]))) + } + return(list(c(diffs))) + } + } + + cost.old <- 1e100 + calls <- 0 + out_predicted <- NA + # Define the model cost function + cost <- function(P) + { + assign("calls", calls+1, inherits=TRUE) + if(length(state.ini.optim) > 0) { + odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed) + names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed)) + } else odeini <- state.ini.fixed + + odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], parms.fixed) + + # Ensure initial state is at time 0 + outtimes = unique(c(0,observed$time)) + evalparse <- function(string) + { + eval(parse(text=string), as.list(c(odeparms, odeini))) + } + + # Solve the system + if (solution == "analytical") { + parent.type = names(mkinmod$map[[1]])[1] + parent.name = names(mkinmod$diffs)[[1]] + o <- switch(parent.type, + SFO = SFO.solution(outtimes, + evalparse(parent.name), + evalparse(paste("k", parent.name, sep="_"))), +# evalparse("k")), +# evalparse(paste("k", parent.name, "sink", sep="_"))), + FOMC = FOMC.solution(outtimes, + evalparse(parent.name), + evalparse("alpha"), evalparse("beta")), + DFOP = DFOP.solution(outtimes, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("g")), + HS = HS.solution(outtimes, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("tb")), + SFORB = SFORB.solution(outtimes, + evalparse(parent.name), + evalparse(paste("k", parent.name, "bound", sep="_")), + evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")), + evalparse(paste("k", parent.name, "sink", sep="_"))) + ) + out <- cbind(outtimes, o) + dimnames(out) <- list(outtimes, c("time", sub("_free", "", parent.name))) + } + if (solution == "eigen") { + coefmat.num <- matrix(sapply(as.vector(mkinmod$coefmat), evalparse), + nrow = length(mod_vars)) + e <- eigen(coefmat.num) + c <- solve(e$vectors, odeini) + f.out <- function(t) { + e$vectors %*% diag(exp(e$values * t), nrow=length(mod_vars)) %*% c + } + o <- matrix(mapply(f.out, outtimes), + nrow = length(mod_vars), ncol = length(outtimes)) + dimnames(o) <- list(mod_vars, outtimes) + out <- cbind(time = outtimes, t(o)) + } + if (solution == "deSolve") + { + out <- ode( + y = odeini, + times = outtimes, + func = mkindiff, + parms = odeparms, + atol = atol + ) + } + + # Output transformation for models with unobserved compartments like SFORB + out_transformed <- data.frame(time = out[,"time"]) + for (var in names(mkinmod$map)) { + if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") { + out_transformed[var] <- out[, var] + } else { + out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) + } + } + assign("out_predicted", out_transformed, inherits=TRUE) + + mC <- CakeCost(out_transformed, observed, y = "value", + err = err, weight = weight, scaleVar = scaleVar) + mC$penalties <- CakePenalties(odeparms, out_transformed, observed) + mC$model <- mC$cost + mC$penalties; + if (mC$model < cost.old) { + if (!quiet) + cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model, + "\n") + + # Plot the data and current model output if requested + if(plot) { + outtimes_plot = seq(min(observed$time), max(observed$time), length.out=100) + if (solution == "analytical") { + o_plot <- switch(parent.type, + SFO = SFO.solution(outtimes_plot, + evalparse(parent.name), + evalparse(paste("k", parent.name, sep="_"))), +# evalparse(paste("k", parent.name, "sink", sep="_"))), + FOMC = FOMC.solution(outtimes_plot, + evalparse(parent.name), + evalparse("alpha"), evalparse("beta")), + DFOP = DFOP.solution(outtimes_plot, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("g")), + HS = HS.solution(outtimes_plot, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("tb")), + SFORB = SFORB.solution(outtimes_plot, + evalparse(parent.name), + evalparse(paste("k", parent.name, "bound", sep="_")), + evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")), + evalparse(paste("k", parent.name, "sink", sep="_"))) + ) + out_plot <- cbind(outtimes_plot, o_plot) + dimnames(out_plot) <- list(outtimes_plot, c("time", sub("_free", "", parent.name))) + } + if(solution == "eigen") { + o_plot <- matrix(mapply(f.out, outtimes_plot), + nrow = length(mod_vars), ncol = length(outtimes_plot)) + dimnames(o_plot) <- list(mod_vars, outtimes_plot) + out_plot <- cbind(time = outtimes_plot, t(o_plot)) + } + if (solution == "deSolve") { + out_plot <- ode( + y = odeini, + times = outtimes_plot, + func = mkindiff, + parms = odeparms) + } + out_transformed_plot <- data.frame(time = out_plot[,"time"]) + for (var in names(mkinmod$map)) { + if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") { + out_transformed_plot[var] <- out_plot[, var] + } else { + out_transformed_plot[var] <- rowSums(out_plot[, mkinmod$map[[var]]]) + } + } + out_transformed_plot <<- out_transformed_plot + + plot(0, type="n", + xlim = range(observed$time), ylim = range(observed$value, na.rm=TRUE), + xlab = "Time", ylab = "Observed") + col_obs <- pch_obs <- 1:length(obs_vars) + names(col_obs) <- names(pch_obs) <- obs_vars + for (obs_var in obs_vars) { + points(subset(observed, name == obs_var, c(time, value)), + pch = pch_obs[obs_var], col = col_obs[obs_var]) + } + matlines(out_transformed_plot$time, out_transformed_plot[-1]) + legend("topright", inset=c(0.05, 0.05), legend=obs_vars, + col=col_obs, pch=pch_obs, lty=1:length(pch_obs)) + } + + assign("cost.old", mC$model, inherits=TRUE) + } + # HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost + mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res) + + return(mC) + } + fit <-modFit(cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, ...) + + # We need to return some more data for summary and plotting + fit$solution <- solution + if (solution == "eigen") { + fit$coefmat <- mkinmod$coefmat + } + if (solution == "deSolve") { + fit$mkindiff <- mkindiff + } + if (plot == TRUE) { + fit$out_transformed_plot = out_transformed_plot + } + + # We also need various other information for summary and plotting + fit$map <- mkinmod$map + fit$diffs <- mkinmod$diffs + + # mkin_long_to_wide does not handle ragged data +# fit$observed <- mkin_long_to_wide(observed) + fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") + names(fit$observed) <- c("time", as.vector(unique(observed$name))) + + predicted_long <- mkin_wide_to_long(out_predicted, time = "time") + fit$predicted <- out_predicted + + # Collect initial parameter values in two dataframes + fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) + fit$start$type = c(rep("state", length(state.ini.optim)), rep("deparm", length(parms.optim))) + fit$start$lower <- lower + fit$start$upper <- upper + + fit$fixed <- data.frame( + value = c(state.ini.fixed, parms.fixed)) + fit$fixed$type = c(rep("state", length(state.ini.fixed)), rep("deparm", length(parms.fixed))) + +# # Calculate chi2 error levels according to FOCUS (2006) + fit$errmin <- CakeChi2(observed, predicted_long, obs_vars, parms.optim, state.ini.optim) + + # Calculate dissipation times DT50 and DT90 and formation fractions + parms.all = c(fit$par, parms.fixed) + fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)), + row.names = obs_vars) + fit$ff <- vector() + fit$SFORB <- vector() + for (obs_var in obs_vars) { + type = names(mkinmod$map[[obs_var]])[1] + + fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all) + } + + fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed) + + # Collect observed, predicted and residuals + data<-observed + data$err<-rep(NA,length(data$time)) + data <- merge(data, predicted_long, by = c("time", "name")) + names(data)<-c("time", "variable", "observed","err-var", "predicted") + data$residual <- data$observed - data$predicted + data$variable <- ordered(data$variable, levels = obs_vars) + fit$data <- data[order(data$variable, data$time), ] + fit$atol <- atol + + class(fit) <- c("CakeFit", "mkinfit", "modFit") + return(fit) +} + diff --git a/CakeOlsPlot.R b/CakeOlsPlot.R new file mode 100644 index 0000000..199cc28 --- /dev/null +++ b/CakeOlsPlot.R @@ -0,0 +1,117 @@ +#$Id: CakeOlsPlot.R 216 2011-07-05 14:35:03Z nelr $ +# Generates fitted curves so the GUI can plot them +# Based on code in IRLSkinfit +# Author: Rob Nelson (Tessella) +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta +# Tessella Project Reference: 6245 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +CakeOlsPlot <- function(fit, xlim = range(fit$data$time), ...) +{ + cat("<PLOT MODEL START>\n") + + solution = fit$solution + if ( is.null(solution) ) { + solution <- "deSolve" + } + atol = fit$atol + if ( is.null(atol) ) { + atol = 1.0e-6 + } + + fixed <- fit$fixed$value + names(fixed) <- rownames(fit$fixed) + parms.all <- c(fit$par, fixed) + ininames <- c( + rownames(subset(fit$start, type == "state")), + rownames(subset(fit$fixed, type == "state"))) + odeini <- parms.all[ininames] + names(odeini) <- names(fit$diffs) + + outtimes <- seq(0, xlim[2], length.out=101) + + odenames <- c( + rownames(subset(fit$start, type == "deparm")), + rownames(subset(fit$fixed, type == "deparm"))) + odeparms <- parms.all[odenames] + + # Solve the system + evalparse <- function(string) + { + eval(parse(text=string), as.list(c(odeparms, odeini))) + } + if (solution == "analytical") { + parent.type = names(fit$map[[1]])[1] + parent.name = names(fit$diffs)[[1]] + o <- switch(parent.type, + SFO = SFO.solution(outtimes, + evalparse(parent.name), + evalparse(paste("k", parent.name, sep="_"))), + FOMC = FOMC.solution(outtimes, + evalparse(parent.name), + evalparse("alpha"), evalparse("beta")), + DFOP = DFOP.solution(outtimes, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("g")), + HS = HS.solution(outtimes, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("tb")), + SFORB = SFORB.solution(outtimes, + evalparse(parent.name), + evalparse(paste("k", parent.name, "free_bound", sep="_")), + evalparse(paste("k", parent.name, "bound_free", sep="_")), + evalparse(paste("k", parent.name, "free_sink", sep="_"))) + ) + out <- cbind(outtimes, o) + dimnames(out) <- list(outtimes, c("time", parent.name)) + } + if (solution == "eigen") { + coefmat.num <- matrix(sapply(as.vector(fit$coefmat), evalparse), + nrow = length(odeini)) + e <- eigen(coefmat.num) + c <- solve(e$vectors, odeini) + f.out <- function(t) { + e$vectors %*% diag(exp(e$values * t), nrow=length(odeini)) %*% c + } + o <- matrix(mapply(f.out, outtimes), + nrow = length(odeini), ncol = length(outtimes)) + dimnames(o) <- list(names(odeini), NULL) + out <- cbind(time = outtimes, t(o)) + } + if (solution == "deSolve") { + out <- ode( + y = odeini, + times = outtimes, + func = fit$mkindiff, + parms = odeparms, + atol = atol + ) + } + + # Output transformation for models with unobserved compartments like SFORB + out_transformed <- data.frame(time = out[,"time"]) + for (var in names(fit$map)) { + if(length(fit$map[[var]]) == 1) { + out_transformed[var] <- out[, var] + } else { + out_transformed[var] <- rowSums(out[, fit$map[[var]]]) + } + } + print(out_transformed) + + cat("<PLOT MODEL END>\n") +} diff --git a/CakePenalties.R b/CakePenalties.R new file mode 100644 index 0000000..3b2a0df --- /dev/null +++ b/CakePenalties.R @@ -0,0 +1,56 @@ +# $Id: CakePenalties.R 216 2011-07-05 14:35:03Z nelr $ +# The CAKE R modules are based on mkin +# CAKE (6245), by Tessella, for Syngenta: Copyright (C) 2011 Syngenta +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Penalty function for fits that should not be accepted +CakePenalties<-function(params, modelled, obs, penalty.scale = 10, ...){ + # Because the model cost is roughly proportional to points, + # make the penalties scale that way too for consistent behaviour + # with many or few data points + sum(CakePenaltiesLong(params, modelled, obs, penalty.scale, ...)$value) * dim(obs)[[1]] +} +CakePenaltiesLong<-function(params, modelled, obs, penalty.scale = 10, ...){ + r<-data.frame(name=character(0), value=numeric(0)) + + # Flow fractions > 1 + #------------------------------- + fails<-CakePenaltiesFF(params); + if(dim(fails)[[1]] > 0){ + #print("Penalty failures:") + #print(fails) + # Add a failure record for each flow fraction that missed + names(fails)<-c('name', 'value') + fails$value = 100*(fails$value - 1) + fails$name = lapply(fails$name, function(x) paste("FF_", x, sep="")) + r <- rbind(r, fails) + #totalPenalties <- totalPenalties + 100*sum(fails$x - 1) # Penalty for failure + } + + r$value <- r$value * penalty.scale + return(r) +} + +CakePenaltiesFF<-function(params){ + ff.values<-params["f_"==substr(names(params), 0, 2)] # Flow fractions + if(length(ff.values) > 0){ + ffs<-data.frame(t(sapply(strsplit(names(ff.values), '_'), FUN=function(x){ x[c(2,4)] }))) # Split into source/dest + names(ffs)<-c('source', 'dest') + ffs['value'] <- ff.values # Add values + sums<-aggregate(ffs$value, list(s=ffs$source), sum) # Sum of flows from each source + fails<-sums[sums$x>1.00001,] # All compartments > 1 + } else { fails<-data.frame(s=character(0), x=numeric(0)) } + return(fails) +}
\ No newline at end of file diff --git a/CakeSummary.R b/CakeSummary.R new file mode 100644 index 0000000..ec42e75 --- /dev/null +++ b/CakeSummary.R @@ -0,0 +1,301 @@ +# $Id: CakeSummary.R 216 2011-07-05 14:35:03Z nelr $ +# CakeSummary: Functions to calculate statistics used in the summary, +# and display the summary itself. + +# Parts modified from package mkin +# Parts Tessella (Rob Nelson/Richard Smith) for Syngenta: Copyright (C) 2011 Syngenta +# Tessella Project Reference: 6245 + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Calculate the decay times for a compartment +# Arguments +# type - type of compartment +# obs_var - compartment name +# parms.all - list of parameters +CakeDT<-function(type, obs_var, parms.all) { + if (type == "SFO") { + # Changed to new parameterisation + #k_names = grep(paste("k", obs_var, sep="_"), names(parms.all), value=TRUE) + k_name = paste("k", obs_var, sep="_") + k_tot = parms.all[k_name] + DT50 = log(2)/k_tot + DT90 = log(10)/k_tot + } + if (type == "FOMC") { + alpha = parms.all["alpha"] + beta = parms.all["beta"] + DT50 = beta * (2^(1/alpha) - 1) + DT90 = beta * (10^(1/alpha) - 1) + } + if (type == "DFOP") { + k1 = parms.all["k1"] + k2 = parms.all["k2"] + g = parms.all["g"] + f <- function(t, x) { + fraction <- g * exp( - k1 * t) + (1 - g) * exp( - k2 * t) + (fraction - (1 - x/100))^2 + } + DTmax <- 1000 + DT50.o <- optimize(f, c(0.001, DTmax), x=50)$minimum + DT50 = ifelse(DTmax - DT50.o < 0.1, NA, DT50.o) + DT90.o <- optimize(f, c(0.001, DTmax), x=90)$minimum + DT90 = ifelse(DTmax - DT90.o < 0.1, NA, DT90.o) + } + if (type == "HS") { + k1 = parms.all["k1"] + k2 = parms.all["k2"] + tb = parms.all["tb"] + DTx <- function(x) { + DTx.a <- (log(100/(100 - x)))/k1 + DTx.b <- tb + (log(100/(100 - x)) - k1 * tb)/k2 + if (DTx.a < tb) DTx <- DTx.a + else DTx <- DTx.b + return(DTx) + } + DT50 <- DTx(50) + DT90 <- DTx(90) + } + if (type == "SFORB") { + # FOCUS kinetics (2006), p. 60 f + k_out_names = grep(paste("k", obs_var, "free", sep="_"), names(parms.all), value=TRUE) + k_out_names = setdiff(k_out_names, paste("k", obs_var, "free", "bound", sep="_")) + k_1output = sum(parms.all[k_out_names]) + k_12 = parms.all[paste("k", obs_var, "free", "bound", sep="_")] + k_21 = parms.all[paste("k", obs_var, "bound", "free", sep="_")] + + sqrt_exp = sqrt(1/4 * (k_12 + k_21 + k_1output)^2 + k_12 * k_21 - (k_12 + k_1output) * k_21) + b1 = 0.5 * (k_12 + k_21 + k_1output) + sqrt_exp + b2 = 0.5 * (k_12 + k_21 + k_1output) - sqrt_exp + + SFORB_fraction = function(t) { + ((k_12 + k_21 - b1)/(b2 - b1)) * exp(-b1 * t) + + ((k_12 + k_21 - b2)/(b1 - b2)) * exp(-b2 * t) + } + f_50 <- function(t) (SFORB_fraction(t) - 0.5)^2 + max_DT <- 1000 + DT50.o <- optimize(f_50, c(0.01, max_DT))$minimum + if (abs(DT50.o - max_DT) < 0.01) DT50 = NA else DT50 = DT50.o + f_90 <- function(t) (SFORB_fraction(t) - 0.1)^2 + DT90.o <- optimize(f_90, c(0.01, 1000))$minimum + if (abs(DT90.o - max_DT) < 0.01) DT90 = NA else DT90 = DT90.o + } + ret<-c(DT50, DT90) + names(ret)<-c("DT50","DT90") + ret +} + +# Compute chi2 etc +# Arguments +# observed - data.frame of observed data +# predicted_long - fitted values etc +# obs_vars - compartment names +# parms.optim - list of fitted parameters +# state,ini.optim - list of fitted initial values +# +CakeChi2<-function(observed, predicted_long, obs_vars, parms.optim, state.ini.optim) { + # Calculate chi2 error levels according to FOCUS (2006) + means <- aggregate(value ~ time + name, data = observed, mean, na.rm=TRUE) + errdata <- merge(means, predicted_long, by = c("time", "name"), suffixes = c("_mean", "_pred")) + errdata <- errdata[order(errdata$time, errdata$name), ] + errmin.overall <- mkinerrmin(errdata, length(parms.optim) + length(state.ini.optim)) + + errmin <- data.frame(err.min = errmin.overall$err.min, + n.optim = errmin.overall$n.optim, df = errmin.overall$df) + rownames(errmin) <- "All data" + for (obs_var in obs_vars) + { + errdata.var <- subset(errdata, name == obs_var) + n.k.optim <- length(grep(paste("k", obs_var, sep="_"), names(parms.optim))) + n.initials.optim <- length(grep(paste(obs_var, ".*", "_0", sep=""), names(state.ini.optim))) + n.optim <- n.k.optim + n.initials.optim + if ("alpha" %in% names(parms.optim)) n.optim <- n.optim + 1 + if ("beta" %in% names(parms.optim)) n.optim <- n.optim + 1 + if ("k1" %in% names(parms.optim)) n.optim <- n.optim + 1 + if ("k2" %in% names(parms.optim)) n.optim <- n.optim + 1 + if ("g" %in% names(parms.optim)) n.optim <- n.optim + 1 + if ("tb" %in% names(parms.optim)) n.optim <- n.optim + 1 + errmin.tmp <- mkinerrmin(errdata.var, n.optim) + errmin[obs_var, c("err.min", "n.optim", "df")] <- errmin.tmp + } + errmin +} + +# Compute additional statistics (rē/efficiency) of obs v pred for each compartment and the whole problem +CakeAdditionalStats<-function(obsvspred){ + agg<-aggregate(obsvspred[c('observed', 'err-var', 'predicted', 'residual')], list(name=obsvspred$variable), function(x){x}, simplify=FALSE) + frames<-apply(agg, 1, as.data.frame); + names(frames)<-agg$name; + frames$all<-obsvspred + + t(sapply(frames, function(frame){ + # This function takes a data frame for a single variable and makes the stats for it + # rē: FOCUS eq 6.5 p. 97 + # Efficiency: eq 6.6 p. 99 + do <- frame$observed - mean(frame$observed) + dp <- frame$predicted - mean(frame$predicted) + r2 <- ( sum(do * dp) / sqrt(sum(do^2) * sum(dp^2)) ) ^ 2 + eff <- 1 - ( sum((frame$predicted - frame$observed)^2) / sum(do^2) ) + list(r2=r2, eff=eff) + })) +} + +# Summarise a fit - used for CakeIrlsFit and CakeOlsFit +summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, ff = TRUE, cov = FALSE,...) { + param <- object$par + pnames <- names(param) + p <- length(param) + covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance + + rdf <- object$df.residual + resvar <- object$ssr / rdf + modVariance <- object$ssr / length(object$residuals) + + if (!is.numeric(covar)) { +# message <- "Cannot estimate covariance; system is singular" +# warning(message) +# covar <- matrix(data = NA, nrow = p, ncol = p) + covar=NULL + } else { + message <- "ok" + rownames(covar) <- colnames(covar) <-pnames + se <- sqrt(diag(covar) * resvar) + names(se) <- pnames + tval <- param / se + } + + if (!all(object$start$lower >=0)) { + message <- "Note that the one-sided t-test may not be appropriate if + parameter values below zero are possible." + warning(message) + } else message <- "ok" + + if(cov && !is.null(covar)) { + alpha <- 0.05 + lci <- param + qt(alpha/2,rdf)*se + uci <- param + qt(1-alpha/2,rdf)*se + param <- cbind(param, se, tval, pt(tval, rdf, lower.tail = FALSE), lci, uci) + dimnames(param) <- list(pnames, c("Estimate", "Std. Error", + "t value", "Pr(>t)", "Lower CI", "Upper CI")) + ans <- list(residuals = object$residuals, + residualVariance = resvar, + sigma = sqrt(resvar), + modVariance = modVariance, + df = c(p, rdf), cov.unscaled = covar, + cov.scaled = covar * resvar, + info = object$info, niter = object$iterations, + stopmess = message, + par = param) + } else { + param <- cbind(param) + ans <- list(residuals = object$residuals, + residualVariance = resvar, + sigma = sqrt(resvar), + modVariance = modVariance, + df = c(p, rdf), + info = object$info, niter = object$iterations, + stopmess = message, + par = param) + } + + ans$diffs <- object$diffs + if(data) { + ans$data <- object$data + ans$additionalstats = CakeAdditionalStats(object$data) + } + ans$start <- object$start + ans$fixed <- object$fixed + ans$errmin <- object$errmin + ans$penalties <- object$penalties + if(distimes) ans$distimes <- object$distimes + if(ff) ans$ff <- object$ff + if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB + class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.modFit") + return(ans) +} + +# Print a summary. Used for CakeIrlsFit, CakeOlsFit and CakeMcmcFit +# Expanded from print.summary.modFit +print.summary.CakeFit <- function(x, digits = max(3, getOption("digits") - 3), ...) { + cat("\nEquations:\n") + print(noquote(as.character(x[["diffs"]]))) + df <- x$df + rdf <- df[2] + + cat("\nStarting values for optimised parameters:\n") + print(x$start) + + cat("\nFixed parameter values:\n") + if(length(x$fixed$value) == 0) cat("None\n") + else print(x$fixed) + + if ( !is.null(x$par) ) { + cat("\nOptimised parameters:\n") + printCoefmat(x$par, digits = digits, ...) + } + + cat("\nResidual standard error:", + format(signif(x$sigma, digits)), "on", rdf, "degrees of freedom\n") + + cat("\nChi2 error levels in percent:\n") + x$errmin$err.min <- 100 * x$errmin$err.min + print(x$errmin, digits=digits,...) + + printdistimes <- !is.null(x$distimes) + if(printdistimes){ + cat("\nEstimated disappearance times:\n") + print(x$distimes, digits=digits,...) + } + + printff <- !is.null(x$ff) + if(printff){ + cat("\nEstimated formation fractions:\n") + print(data.frame(ff = x$ff), digits=digits,...) + } + + printSFORB <- !is.null(x$SFORB) + if(printSFORB){ + cat("\nEstimated Eigenvalues of SFORB model(s):\n") + print(x$SFORB, digits=digits,...) + } + + printcor <- !is.null(x$cov.unscaled) + if (printcor){ + Corr <- cov2cor(x$cov.unscaled) + rownames(Corr) <- colnames(Corr) <- rownames(x$par) + cat("\nParameter correlation:\n") + print(Corr, digits = digits, ...) + } + + printdata <- !is.null(x$data) + if (printdata){ + cat("\nData:\n") + print(format(x$data, digits = digits, scientific = FALSE,...), row.names = FALSE) + } + + if(!is.null(x$additionalstats)){ + cat("\nAdditional Stats:\n"); + print(x$additionalstats, digits=digits, ...) + } + + if(!(is.null(x$penalties) || 0 == dim(x$penalties)[[1]])){ + cat("\nPenalties:\n"); + print(x$penalties, digits=digits, row.names = FALSE, ...) + } + + if ( !is.null(x$seed) ) { + cat("\nRandom Seed:", x$seed, "\n") + } + invisible(x) +} |