From be6d42ef636e8e1c9fdcfa6f8738ee10e885d75b Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Wed, 18 Oct 2017 10:17:59 +0200 Subject: Version 1.4 --- CakeCost.R | 348 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ CakeInit.R | 39 +++++++ CakeIrlsFit.R | 147 ++++++++++++++++++++++++ CakeIrlsPlot.R | 25 ++++ CakeMcmcFit.R | 292 +++++++++++++++++++++++++++++++++++++++++++++++ CakeModel.R | 261 ++++++++++++++++++++++++++++++++++++++++++ CakeNullPlot.R | 127 +++++++++++++++++++++ CakeOlsFit.R | 326 ++++++++++++++++++++++++++++++++++++++++++++++++++++ CakeOlsPlot.R | 117 +++++++++++++++++++ CakePenalties.R | 56 +++++++++ CakeSummary.R | 301 ++++++++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 2039 insertions(+) create mode 100644 CakeCost.R create mode 100644 CakeInit.R create mode 100644 CakeIrlsFit.R create mode 100644 CakeIrlsPlot.R create mode 100644 CakeMcmcFit.R create mode 100644 CakeModel.R create mode 100644 CakeNullPlot.R create mode 100644 CakeOlsFit.R create mode 100644 CakeOlsPlot.R create mode 100644 CakePenalties.R create mode 100644 CakeSummary.R diff --git a/CakeCost.R b/CakeCost.R new file mode 100644 index 0000000..1cc3981 --- /dev/null +++ b/CakeCost.R @@ -0,0 +1,348 @@ +## ----------------------------------------------------------------------------- +## The model cost and residuals +## ----------------------------------------------------------------------------- +# The CAKE R modules are based on mkin, +# Call to approx is only performed if there are multiple non NA values +# which should prevent most of the crashes - Rob Nelson (Tessella) +# +# Modifications developed by Tessella Plc for Syngenta, Copyright (C) 2011 Syngenta +# Authors: Rob Nelson, Richard Smith +# Tessella Project Reference: 6245 +# +# This program is free software: you can +# redistribute them and/or modify them under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see # +# +# $Id: CakeCost.R 216 2011-07-05 14:35:03Z nelr $ + +CakeCost <- function (model, obs, x = "time", y = NULL, err = NULL, + weight = "none", scaleVar = FALSE, cost = NULL, ...) { + + ## convert vector to matrix + if (is.vector(obs)) { + cn <- names(obs) + obs <- matrix(data = obs, nrow = 1) + colnames(obs) <- cn + } + if (is.vector(model)) { + cn <- names(model) + model <- matrix(data=model, nrow = 1) + colnames(model) <- cn + } + + ##============================================================================= + ## Observations + ##============================================================================= + + ## The position of independent variable(s) + ix <- 0 + if (! is.null(x)) { # mapping required... + ## For now multiple independent variables are not supported... + if (length(x) > 1) + stop ("multiple independent variables in 'obs' are not yet supported") + + if (! is.character(x)) + stop ("'x' should be the *name* of the column with the independent variable in 'obs' or NULL") + ix <- which(colnames(obs) %in% x) + if (length(ix) != length(x)) + stop(paste("Independent variable column not found in observations", x)) + } else ix <- NULL + + ## The position of weighing values + ierr <- 0 + if (! is.null(err)) { + if (! is.character(err)) + stop ("'err' should be the *name* of the column with the error estimates in obs or NULL") + ierr <- which(colnames(obs) == err) # only one + if (length(ierr) == 0) + stop(paste("Column with error estimates not found in observations", err)) + } + + ## The dependent variables + type <- 1 # data input type: type 2 is table format, type 1 is long format... + + if (!is.null(y)) { # it is in table format; first column are names of observed data... + + Names <- as.character(unique(obs[, 1])) # Names of data sets, all data should be model variables... + Ndat <- length(Names) # Number of data sets + ilist <- 1:Ndat + if (! is.character(y)) + stop ("'y' should be the *name* of the column with the values of the dependent variable in obs") + iy <- which(colnames(obs) == y) + if (length(iy) == 0) + stop(paste("Column with value of dependent variable not found in observations", y)) + type <- 2 + + } else { # it is a matrix, variable names are column names + Ndat <- NCOL(obs)-1 + Names <- colnames(obs) + ilist <- (1:NCOL(obs)) # column positions of the (dependent) observed variables + exclude <- ix # exclude columns that are not + if (ierr > 0) + exclude <- c(ix, ierr) # exclude columns that are not + if (length(exclude) > 0) + ilist <- ilist[-exclude] + } + + #================================ + # The model results + #================================ + + ModNames <- colnames(model) # Names of model variables + if (length(ix) > 1) { + ixMod <- NULL + + for ( i in 1:length(ix)) { + ix2 <- which(colnames(model) == x[i]) + if (length(ix2) == 0) + stop(paste("Cannot calculate cost: independent variable not found in model output", x[i])) + ixMod <- c(ixMod, ix2) + } + + xMod <- model[,ixMod] # Independent variable, model + } else if (length(ix) == 1) { + ixMod <- which(colnames(model) == x) + if (length(ixMod) == 0) + stop(paste("Cannot calculate cost: independent variable not found in model output", x)) + xMod <- model[,ixMod] # Independent variable, model + } + Residual <- NULL + CostVar <- NULL + + #================================ + # Compare model and data... + #================================ + xDat <- 0 + iDat <- 1:nrow(obs) + + for (i in ilist) { # for each observed variable ... + ii <- which(ModNames == Names[i]) + if (length(ii) == 0) stop(paste("observed variable not found in model output", Names[i])) + yMod <- model[,ii] + if (type == 2) { # table format + iDat <- which (obs[,1] == Names[i]) + if (length(ix) > 0) xDat <- obs[iDat, ix] + obsdat <- obs[iDat, iy] + } else { + if (length(ix) > 0) xDat <- obs[, 1] + obsdat <- obs[,i] + } + ii <- which(is.na(obsdat)) + if (length(ii) > 0) { + xDat <- xDat[-ii] + obsdat <- obsdat[-ii] + } + + # CAKE version - Added tests for multiple non-NA values + if (length(ix) > 0 && length(unique(xMod[!is.na(xMod)]))>1 && length(yMod[!is.na(yMod)])>1) + { + ModVar <- approx(xMod, yMod, xout = xDat)$y + } + else { + cat("CakeCost Warning: Only one valid point - using mean (yMod was", yMod, ")\n") + ModVar <- mean(yMod[!is.na(yMod)]) + obsdat <- mean(obsdat) + } + iex <- which(!is.na(ModVar)) + ModVar <- ModVar[iex] + obsdat <- obsdat[iex] + xDat <- xDat[iex] + if (ierr > 0) { + Err <- obs[iDat, ierr] + Err <- Err[iex] + } else { + if (weight == "std") + Err <- sd(obsdat) + else if (weight == "mean") + Err <- mean(abs(obsdat)) + else if (weight == "none") + Err <- 1 + else + stop("error: do not recognize 'weight'; should be one of 'none', 'std', 'mean'") + } + if (any(is.na(Err))) + stop(paste("error: cannot estimate weighing for observed variable: ", Names[i])) + if (min(Err) <= 0) + stop(paste("error: weighing for observed variable is 0 or negative:", Names[i])) + if (scaleVar) + Scale <- 1/length(obsdat) + else Scale <- 1 + Res <- (ModVar- obsdat) + res <- Res/Err + resScaled <- res*Scale + Residual <- rbind(Residual, + data.frame( + name = Names[i], + x = xDat, + obs = obsdat, + mod = ModVar, + weight = 1/Err, + res.unweighted = Res, + res = res)) + + CostVar <- rbind(CostVar, + data.frame( + name = Names[i], + scale = Scale, + N = length(Res), + SSR.unweighted = sum(Res^2), + SSR.unscaled = sum(res^2), + SSR = sum(resScaled^2))) + } # end loop over all observed variables + + ## SSR + Cost <- sum(CostVar$SSR * CostVar$scale) + + Lprob <- -sum(log(pmax(0, dnorm(Residual$mod, Residual$obs, Err)))) # avoid log of negative values + #Lprob <- -sum(log(pmax(.Machine$double.xmin, dnorm(Residual$mod, Residual$obs, Err)))) #avoid log(0) + + if (! is.null(cost)) { + Cost <- Cost + cost$model + CostVar <- rbind(CostVar, cost$var) + Residual <- rbind(Residual, cost$residuals) + Lprob <- Lprob + cost$minlogp + } + out <- list(model = Cost, cost = Cost, minlogp = Lprob, var = CostVar, residuals = Residual) + class(out) <- "modCost" + return(out) +} + +## ----------------------------------------------------------------------------- +## S3 methods of modCost +## ----------------------------------------------------------------------------- + +plot.modCost<- function(x, legpos="topleft", ...) { + nvar <- nrow(x$var) + + dots <- list(...) + + dots$xlab <- if(is.null(dots$xlab)) "x" else dots$xlab + dots$ylab <- if(is.null(dots$ylab)) "weighted residuals" else dots$ylab + DotsPch <- if(is.null(dots$pch)) (16:24) else dots$pch + dots$pch <- if(is.null(dots$pch)) (16:24)[x$residuals$name] else dots$pch[x$residuals$name] + DotsCol <- if(is.null(dots$col)) (1:nvar) else dots$col + dots$col <- if(is.null(dots$col)) (1:nvar)[x$residuals$name] else dots$col[x$residuals$name] + + do.call("plot", c(alist(x$residuals$x, x$residuals$res), dots)) + +# plot(x$residuals$x, x$residuals$res, xlab="x", ylab="weighted residuals", +# pch=c(16:24)[x$residuals$name],col=c(1:nvar)[x$residuals$name],...) + + if (! is.na(legpos)) + legend(legpos, legend = x$var$name, col = DotsCol, pch = DotsPch) +} + +## ----------------------------------------------------------------------------- +## Internal cost function for optimisers +## ----------------------------------------------------------------------------- +# Cost function. The returned structure must have $model +# Called from the middle of IrlsFit and OlsFit +# We need to preserve state between calls so make a closure +CakeInternalCostFunctions <- function(mkinmod, state.ini.optim, state.ini.optim.boxnames, + state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, + quiet, plot=FALSE, atol=1e-6){ + cost.old <- 1e+100 + calls <- 0 + out_predicted <- NA + + get.predicted <- function(){ out_predicted } + + get.best.cost <- function(){ cost.old } + reset.best.cost <- function() { cost.old<<-1e+100 } + + get.calls <- function(){ calls } + set.calls <- function(newcalls){ calls <<- newcalls } + + set.error<-function(err) { observed$err <<- err } + + cost <- function(P) { + assign("calls", calls + 1, inherits = TRUE) + print(P) + if (length(state.ini.optim) > 0) { + odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed) + names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed)) + } + else odeini <- state.ini.fixed + odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], + parms.fixed) + # Ensure initial state is at time 0 + outtimes = unique(c(0,observed$time)) + out <- ode(y = odeini, times = outtimes, func = mkindiff, + parms = odeparms, atol=atol) + out_transformed <- data.frame(time = out[, "time"]) + for (var in names(mkinmod$map)) { + if (length(mkinmod$map[[var]]) == 1) { + out_transformed[var] <- out[, var] + } + else { + out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) + } + } + assign("out_predicted", out_transformed, inherits = TRUE) + mC <- CakeCost(out_transformed, observed, y = "value", + err = 'err', scaleVar = scaleVar) + mC$penalties <- CakePenalties(odeparms, out_transformed, observed) + mC$model <- mC$cost + mC$penalties; + if (mC$model < cost.old) { + if (!quiet) + cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model, + "\n") + if (plot) { + outtimes_plot = seq(min(observed$time), max(observed$time), + length.out = 100) + out_plot <- ode(y = odeini, times = outtimes_plot, + func = mkindiff, parms = odeparms, atol=atol) + out_transformed_plot <- data.frame(time = out_plot[, + "time"]) + for (var in names(mkinmod$map)) { + if (length(mkinmod$map[[var]]) == 1) { + out_transformed_plot[var] <- out_plot[, var] + } + else { + out_transformed_plot[var] <- rowSums(out_plot[, + mkinmod$map[[var]]]) + } + } + plot(0, type = "n", xlim = range(observed$time), + ylim = range(observed$value, na.rm = TRUE), + xlab = "Time", ylab = "Observed") + col_obs <- pch_obs <- 1:length(obs_vars) + names(col_obs) <- names(pch_obs) <- obs_vars + for (obs_var in obs_vars) { + points(subset(observed, name == obs_var, c(time, + value)), pch = pch_obs[obs_var], col = col_obs[obs_var]) + } + matlines(out_transformed_plot$time, out_transformed_plot[-1]) + legend("topright", inset = c(0.05, 0.05), legend = obs_vars, + col = col_obs, pch = pch_obs, lty = 1:length(pch_obs)) + } + assign("cost.old", mC$model, inherits = TRUE) + } + # HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost + if(mC$penalties > 0){ + #cat("Penalty adjustment: ",mC$penalties) + mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res) + } + + return(mC) + } + + + + list(cost=cost, + get.predicted=get.predicted, + get.calls=get.calls, set.calls=set.calls, + get.best.cost=get.best.cost, reset.best.cost=reset.best.cost, + set.error=set.error + ) +} \ No newline at end of file diff --git a/CakeInit.R b/CakeInit.R new file mode 100644 index 0000000..3e609d5 --- /dev/null +++ b/CakeInit.R @@ -0,0 +1,39 @@ +#$Id: CakeInit.R 221 2011-11-01 14:12:45Z nelr $ + +# Purpose: Load the modules required by CAKE +# Call with chdir=TRUE so it can load our source from the current directory +# Tessella Project Reference: 6245 + +# Copyright (C) 2011 Syngenta + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see .” + +library(mkin) +source("CakeModel.R") +source("CakeOlsFit.R") +source("CakeIrlsFit.R") +source("CakeOlsPlot.R") +source("CakeIrlsPlot.R") +source("CakeNullPlot.R") +source("CakeCost.R") +source("CakePenalties.R") +source("CakeMcmcFit.R") +source("CakeSummary.R") + +options(width=1000) +options(error=recover) +options(warn=-1) + +# When running from C#, this must match the C# CAKE version +CAKE.version<-"1.4" diff --git a/CakeIrlsFit.R b/CakeIrlsFit.R new file mode 100644 index 0000000..bf20572 --- /dev/null +++ b/CakeIrlsFit.R @@ -0,0 +1,147 @@ +#$Id: CakeIrlsFit.R 216 2011-07-05 14:35:03Z nelr $ +# +# The CAKE R modules are based on mkin +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta +# Authors: Rob Nelson, Richard Smith +# Tessella Project Reference: 6245 + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see .” +# +CakeIrlsFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)), + state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), lower = 0, + upper = Inf, fixed_parms = NULL, fixed_initials = names(mkinmod$diffs)[-1], + plot = FALSE, quiet = FALSE, err = NULL, weight = "none", + scaleVar = FALSE, atol=1e-6, control=list(),...) +{ + +### This is a modification based on the "mkinfit" function. +### version 0.1 July 20 +### +# This version has been modified to expect SFO parameterised as k and flow fractions +# Based on code in IRLSkinfit + NAind <-which(is.na(observed$value)) + mod_vars <- names(mkinmod$diffs) + observed <- subset(observed, name %in% names(mkinmod$map)) + ERR <- rep(1,nrow(observed)) + observed <- cbind(observed,err=ERR) + obs_vars = unique(as.character(observed$name)) + if (is.null(names(parms.ini))) + names(parms.ini) <- mkinmod$parms + mkindiff <- function(t, state, parms) { + time <- t + diffs <- vector() + for (box in mod_vars) { + diffname <- paste("d", box, sep = "_") + diffs[diffname] <- with(as.list(c(time, state, parms)), + eval(parse(text = mkinmod$diffs[[box]]))) + } + return(list(c(diffs))) + } + if (is.null(names(state.ini))) + names(state.ini) <- mod_vars + parms.fixed <- parms.ini[fixed_parms] + optim_parms <- setdiff(names(parms.ini), fixed_parms) + parms.optim <- parms.ini[optim_parms] + state.ini.fixed <- state.ini[fixed_initials] + optim_initials <- setdiff(names(state.ini), fixed_initials) + state.ini.optim <- state.ini[optim_initials] + state.ini.optim.boxnames <- names(state.ini.optim) + if (length(state.ini.optim) > 0) { + names(state.ini.optim) <- paste(names(state.ini.optim), + "0", sep = "_") + } + + costFunctions <- CakeInternalCostFunctions(mkinmod, state.ini.optim, state.ini.optim.boxnames, + state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, quiet, atol=atol) + + ############### Iteratively Reweighted Least Squares############# + ## Start with no weighting + fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, + upper = upper,control=control,...) + + if(length(control)==0) + { + irls.control <- list(maxIter=10,tol=1e-05) + control <- list(irls.control=irls.control) + }else{ + if(is.null(control$irls.control)) + { + irls.control <- list(maxIter=10,tol=1e-05) + control <- list(irls.control=irls.control) + } + } + irls.control <- control$irls.control + maxIter <- irls.control$maxIter + tol <- irls.control$tol + #### + niter <- 1 + ## insure one IRLS iteration + diffsigma <- 100 + olderr <- rep(1,length(mod_vars)) + while(diffsigma>tol & niter<=maxIter) + { + err <- sqrt(fit$var_ms_unweighted) + ERR <- err[as.character(observed$name)] + costFunctions$set.error(ERR) + diffsigma <- sum((err-olderr)^2) + cat("IRLS iteration at",niter, "; Diff in error variance ", diffsigma,"\n") + olderr <- err + fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, control=control, ...) + niter <- niter+1 + + ### If not converged, reweight and fit + } + + ########################################### + fit$mkindiff <- mkindiff + fit$map <- mkinmod$map + fit$diffs <- mkinmod$diffs + + out_predicted <- costFunctions$get.predicted() + + # mkin_long_to_wide does not handle ragged data + fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") + names(fit$observed) <- c("time", as.vector(unique(observed$name))) + + predicted_long <- mkin_wide_to_long(out_predicted, time = "time") + fit$predicted <- out_predicted + fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) + fit$start$type = c(rep("state", length(state.ini.optim)), + rep("deparm", length(parms.optim))) + fit$start$lower <- lower + fit$start$upper <- upper + fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) + fit$fixed$type = c(rep("state", length(state.ini.fixed)), + rep("deparm", length(parms.fixed))) + + fit$errmin <- CakeChi2(observed, predicted_long, obs_vars, parms.optim, state.ini.optim) + parms.all = c(fit$par, parms.fixed) + fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed) + fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), + DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) + for (obs_var in obs_vars) { + type = names(mkinmod$map[[obs_var]])[1] + fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all) + } + + data <- merge(observed, predicted_long, by = c("time", "name")) + + + names(data) <- c("time", "variable", "observed","err-var", "predicted") + data$residual <- data$observed - data$predicted + data$variable <- ordered(data$variable, levels = obs_vars) + fit$data <- data[order(data$variable, data$time), ] + class(fit) <- c("CakeFit", "mkinfit", "modFit") + return(fit) +} diff --git a/CakeIrlsPlot.R b/CakeIrlsPlot.R new file mode 100644 index 0000000..2de8910 --- /dev/null +++ b/CakeIrlsPlot.R @@ -0,0 +1,25 @@ +#$Id: CakeIrlsPlot.R 216 2011-07-05 14:35:03Z nelr $ +# Generates fitted curves so the GUI can plot them +# Author: Rob Nelson (Tessella) +# Tessella Project Reference: 6245 + +# Copyright (C) 2011 Syngenta +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see .” + +CakeIrlsPlot <- function(fit, xlim = range(fit$data$time), ...) +{ + CakeOlsPlot(fit, xlim, ...) +} + diff --git a/CakeMcmcFit.R b/CakeMcmcFit.R new file mode 100644 index 0000000..209f904 --- /dev/null +++ b/CakeMcmcFit.R @@ -0,0 +1,292 @@ +# $Id: CakeMcmcFit.R 216 2011-07-05 14:35:03Z nelr $ +# The CAKE R modules are based on mkin, +# Based on mcmckinfit as modified by Bayer +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta +# Author: Rob Nelson +# Tessella Project Reference: 6245 + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see .” + +CakeMcmcFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)), + state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), lower = 0, + upper = Inf, fixed_parms = NULL, fixed_initials = names(mkinmod$diffs)[-1], + plot = FALSE, quiet = FALSE, err = NULL, weight = "none", + scaleVar = FALSE, commonsigma=FALSE,jump = NULL,prior = NULL, + wvar0=0.1,niter = 1000, + outputlength = niter, burninlength = 0, updatecov = niter, + ntrydr = 1, drscale = NULL, verbose = TRUE,fitstart=TRUE,seed=NULL,atol=1e-6,...) +{ + + NAind <-which(is.na(observed$value)) + mod_vars <- names(mkinmod$diffs) + observed <- subset(observed, name %in% names(mkinmod$map)) + ERR <- rep(1,nrow(observed)) + observed <- cbind(observed,err=ERR) + obs_vars = unique(as.character(observed$name)) + if (is.null(names(parms.ini))) + names(parms.ini) <- mkinmod$parms + mkindiff <- function(t, state, parms) { + time <- t + diffs <- vector() + for (box in mod_vars) { + diffname <- paste("d", box, sep = "_") + diffs[diffname] <- with(as.list(c(time, state, parms)), + eval(parse(text = mkinmod$diffs[[box]]))) + } + return(list(c(diffs))) + } + if (is.null(names(state.ini))) + names(state.ini) <- mod_vars + parms.fixed <- parms.ini[fixed_parms] + optim_parms <- setdiff(names(parms.ini), fixed_parms) + parms.optim <- parms.ini[optim_parms] + state.ini.fixed <- state.ini[fixed_initials] + optim_initials <- setdiff(names(state.ini), fixed_initials) + state.ini.optim <- state.ini[optim_initials] + state.ini.optim.boxnames <- names(state.ini.optim) + if (length(state.ini.optim) > 0) { + names(state.ini.optim) <- paste(names(state.ini.optim), + "0", sep = "_") + } + + costFunctions <- CakeInternalCostFunctions(mkinmod, state.ini.optim, state.ini.optim.boxnames, + state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, quiet, atol=atol) + bestIteration <- -1; + costWithStatus <- function(P, ...){ + r <- costFunctions$cost(P) + if(r$cost == costFunctions$get.best.cost()) { + bestIteration<<-costFunctions$get.calls(); + cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n') + } + cat("MCMC Call no.", costFunctions$get.calls(), '\n') + return(r) + } + + # Set the seed + if ( is.null(seed) ) { + # No seed so create a random one so there is something to report + seed = runif(1,0,2^31-1) + } + seed = as.integer(seed) + set.seed(seed) + + if(fitstart==TRUE) + { + ## ############# Get Initial Paramtervalues ############# + ## Start with no weighting + fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, + upper = upper,...) + + if(commonsigma==TRUE){ + #observed$err <- rep(1,nrow(observed)) + cov0 <- summary(fit)$cov.scaled*2.4^2/length(fit$par) + costFunctions$set.calls(0); costFunctions$reset.best.cost() + res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose) + #res <- modMCMC(cost,fit$par,lower=lower,upper=upper,niter=niter) + }else{ + ## ############## One reweighted estimation ############################ + ## Estimate the error variance(sd) + tmpres <- fit$residuals + oldERR <- observed$err + err <- rep(NA,length(mod_vars)) + for(i in 1:length(mod_vars)) + { + box <- mod_vars[i] + ind <- which(names(tmpres)==box) + tmp <- tmpres[ind] + err[i] <- sd(tmp) + } + names(err) <- mod_vars + ERR <- err[as.character(observed$name)] + observed$err <-ERR + costFunctions$set.error(ERR) + olderr <- rep(1,length(mod_vars)) + diffsigma <- sum((err-olderr)^2) + ## At least do one iteration step to get a weighted LS + fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, ...) + ## Use this as the Input for MCMC algorithm + ## ########################## + fs <- summary(fit) + cov0 <- if(all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled*2.4^2/length(fit$par) + var0 <- fit$var_ms_unweighted + costFunctions$set.calls(0); costFunctions$reset.best.cost() + res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose) + + } + }else { + stop('fitstart=FALSE code removed until needed') + } + # Construct the fit object to return + fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) + fit$start$type = c(rep("state", length(state.ini.optim)), + rep("deparm", length(parms.optim))) + fit$start$lower <- lower + fit$start$upper <- upper + fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) + fit$fixed$type = c(rep("state", length(state.ini.fixed)), + rep("deparm", length(parms.fixed))) + + fit$mkindiff <- mkindiff + fit$map <- mkinmod$map + fit$diffs <- mkinmod$diffs + + # mkin_long_to_wide does not handle ragged data + fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") + #names(fit$observed) <- c("time", as.vector(unique(observed$name))) + fns <- function(str) { + fields <- strsplit(str, "\\.")[[1]] + if (fields[1] == "value") { + return(fields[2]) + } + else { + return(str) + } + } + names(fit$observed) <- sapply(names(fit$observed), fns) + + # Replace mean from modFit with mean from modMCMC + fnm <- function(x) mean(res$pars[,x]) + fit$par <- sapply(dimnames(res$pars)[[2]],fnm) + fit$bestpar <- res$bestpar + fit$costfn <- costWithStatus + + # Disappearence times + parms.all <- c(fit$par, parms.fixed) + obs_vars = unique(as.character(observed$name)) + fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), + DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) + for (obs_var in obs_vars) { + type = names(mkinmod$map[[obs_var]])[1] + fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all) + } + + + # Fits to data + odeini <- state.ini + odeparms <- parms.all + # If this step modelled the start amount, include it. Lookup in vectors returns NA not null if missing + if(!is.na(odeparms["Parent_0"])) { odeini['Parent'] = odeparms["Parent_0"] } + + # Solve the system + evalparse <- function(string) + { + eval(parse(text=string), as.list(c(odeparms, odeini))) + } + + # Ensure initial state is at time 0 + obstimes <- unique(c(0,observed$time)) + + out <- ode( + y = odeini, + times = obstimes, + func = mkindiff, + parms = odeparms, + ) + + # Output transformation for models with unobserved compartments like SFORB + out_transformed <- data.frame(time = out[,"time"]) + for (var in names(mkinmod$map)) { + if(length(mkinmod$map[[var]]) == 1) { + out_transformed[var] <- out[, var] + } else { + out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) + } + } + fit$predicted <- out_transformed + fit$penalties <- CakePenaltiesLong(odeparms, out_transformed, observed) + + predicted_long <- mkin_wide_to_long(out_transformed,time="time") + obs_vars = unique(as.character(observed$name)) + fit$errmin <- CakeChi2(observed, predicted_long, obs_vars, parms.optim, state.ini.optim) + + data<-observed + data$err<-rep(NA,length(data$time)) + data<-merge(data, predicted_long, by=c("time","name")) + names(data)<-c("time", "variable", "observed","err-var", "predicted") + data$residual<-data$observed-data$predicted + data$variable <- ordered(data$variable, levels = obs_vars) + fit$data <- data[order(data$variable, data$time), ] + + sq <- data$residual * data$residual + fit$ssr <- sum(sq) + + fit$seed = seed + + fit$res <- res + class(fit) <- c("CakeMcmcFit", "mkinfit", "modFit") + return(fit) +} + + +# Summarise a fit +summary.CakeMcmcFit <- function(object, data = TRUE, distimes = TRUE, ff = TRUE, cov = FALSE,...) { + param <- object$par + pnames <- names(param) + p <- length(param) + #covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance + mcmc <- object$res + covar <- cov(mcmc$pars) + + rdf <- object$df.residual + + message <- "ok" + rownames(covar) <- colnames(covar) <-pnames + + #se <- sqrt(diag(covar) * resvar) + fnse <- function(x) sd(mcmc$pars[,x]) #/sqrt(length(mcmc$pars[,x])) + se <- sapply(dimnames(mcmc$pars)[[2]],fnse) + + tval <- param / se + + if (!all(object$start$lower >=0)) { + message <- "Note that the one-sided t-test may not be appropriate if + parameter values below zero are possible." + warning(message) + } else message <- "ok" + + # Parameter fits + alpha <- 0.05 + lci <- param + qt(alpha/2,rdf)*se + uci <- param + qt(1-alpha/2,rdf)*se + param <- cbind(param, se, tval, pt(tval, rdf, lower.tail = FALSE), lci, uci) + dimnames(param) <- list(pnames, c("Estimate", "Std. Error", "t value", "Pr(>t)", "Lower CI", "Upper CI")) + + # Residuals from mean of MCMC fit + resvar <- object$ssr/ rdf + modVariance <- object$ssr / length(object$data$residual) + + ans <- list(residuals = object$data$residuals, + residualVariance = resvar, + sigma = sqrt(resvar), + modVariance = modVariance, + df = c(p, rdf), cov.unscaled = covar, + cov.scaled = covar * resvar, + info = object$info, niter = object$iterations, + stopmess = message, + par = param) + + ans$diffs <- object$diffs + ans$data <- object$data + ans$additionalstats = CakeAdditionalStats(object$data) + ans$seed <- object$seed + + ans$start <- object$start + ans$fixed <- object$fixed + ans$errmin <- object$errmin + if(distimes) ans$distimes <- object$distimes + if(ff) ans$ff <- object$ff + if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB + class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.modFit") + return(ans) +} diff --git a/CakeModel.R b/CakeModel.R new file mode 100644 index 0000000..a93d7af --- /dev/null +++ b/CakeModel.R @@ -0,0 +1,261 @@ +# Was Id: mkinmod.R 71 2010-09-12 01:13:36Z jranke +# $Id: CakeModel.R 216 2011-07-05 14:35:03Z nelr $ + +# The CAKE R modules are based on mkin +# Portions Johannes Ranke, 2010 +# Contact: mkin-devel@lists.berlios.de + +# This version has been modified to parameterise SFO as k and flow fractions +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta +# Tessella Project Reference: 6245 + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see .” + +CakeModel <- function(...) +{ + spec <- list(...) + obs_vars <- names(spec) + + # The returned model will be a list of character vectors, containing + # differential equations, parameter names and a mapping from model variables + # to observed variables. If possible, a matrix representation of the + # differential equations is included + parms <- vector() + diffs <- vector() + map <- list() + if(spec[[1]]$type %in% c("FOMC", "DFOP", "HS")) { + mat = FALSE + if(!is.null(spec[[1]]$to)) { + message <- paste("Only constant formation fractions over time are implemented.", + "Depending on the reason for the time dependence of degradation, this may be unrealistic", + sep="\n") + warning(message) + } else message <- "ok" + } else mat = TRUE + + mat = FALSE # XYZZY Not implemented yet assuming it should be + nlt = list() + + # Establish list of differential equations + for (varname in obs_vars) + { + if(is.null(spec[[varname]]$type)) stop( + "Every argument to mkinmod must be a list containing a type component") + if(!spec[[varname]]$type %in% c("SFO", "FOMC", "DFOP", "HS", "SFORB")) stop( + "Available types are SFO, FOMC, DFOP, HS and SFORB only") + new_parms <- vector() + + # New (sub)compartments (boxes) needed for the model type + new_boxes <- switch(spec[[varname]]$type, + SFO = varname, + FOMC = varname, + DFOP = varname, + HS = varname, + SFORB = paste(varname, c("free", "bound"), sep="_") + ) + map[[varname]] <- new_boxes + names(map[[varname]]) <- rep(spec[[varname]]$type, length(new_boxes)) + + # Start a new differential equation for each new box + new_diffs <- paste("d_", new_boxes, " =", sep="") + + # Turn on sink if not specified otherwise + if(is.null(spec[[varname]]$sink)) spec[[varname]]$sink <- TRUE + + #@@@ADD SFO k HERE !!!!!!!!!!!!! + # Construct and add SFO term and add SFO parameters if needed + if(spec[[varname]]$type == "SFO") { + # From p. 53 of the FOCUS kinetics report + k_term <- paste("k", new_boxes[[1]], sep="_") + nonlinear_term <- paste(k_term, new_boxes[[1]], sep=" * ") + spec[[varname]]$nlt<-nonlinear_term + new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) + new_parms <- k_term + ff <- vector() + } + + # Construct and add FOMC term and add FOMC parameters if needed + if(spec[[varname]]$type == "FOMC") { + if(match(varname, obs_vars) != 1) { + stop("Type FOMC is only possible for the first compartment, which is assumed to be the source compartment") + } +# if(spec[[varname]]$sink == FALSE) { +# stop("Turning off the sink for the FOMC model is not implemented") +# } + # From p. 53 of the FOCUS kinetics report + nonlinear_term <- paste("(alpha/beta) * ((time/beta) + 1)^-1 *", new_boxes[[1]]) + spec[[varname]]$nlt<-nonlinear_term + new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) + new_parms <- c("alpha", "beta") + ff <- vector() + } + + # Construct and add DFOP term and add DFOP parameters if needed + if(spec[[varname]]$type == "DFOP") { + if(match(varname, obs_vars) != 1) { + stop("Type DFOP is only possible for the first compartment, which is assumed to be the source compartment") + } +# if(spec[[varname]]$sink == FALSE) { +# stop("Turning off the sink for the DFOP model is not implemented") +# } + # From p. 57 of the FOCUS kinetics report + nonlinear_term <- paste("((k1 * g * exp(-k1 * time) + k2 * (1 - g) * exp(-k2 * time)) / (g * exp(-k1 * time) + (1 - g) * exp(-k2 * time))) *", new_boxes[[1]]) + spec[[varname]]$nlt<-nonlinear_term + new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) + new_parms <- c("k1", "k2", "g") + ff <- vector() + } + + # Construct and add HS term and add HS parameters if needed + if(spec[[varname]]$type == "HS") { + if(match(varname, obs_vars) != 1) { + stop("Type HS is only possible for the first compartment, which is assumed to be the source compartment") + } +# if(spec[[varname]]$sink == FALSE) { +# stop("Turning off the sink for the HS model is not implemented") +# } + # From p. 55 of the FOCUS kinetics report +# nonlinear_term <- paste("ifelse(time <= tb, k1, k2)", "*", new_boxes[[1]]) + nonlinear_term <- paste("((k1 - k2) / ( 1 + exp( 10*(time - tb) ) ) + k2)", "*", new_boxes[[1]]) + spec[[varname]]$nlt<-nonlinear_term + new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) + new_parms <- c("k1", "k2", "tb") + ff <- vector() + } + + # Construct terms for transfer to sink and add if appropriate + + #@@@@ REMOVE THIS ????? + if(spec[[varname]]$sink) { + # Add first-order sink term to first (or only) box for SFO and SFORB +# if(spec[[varname]]$type %in% c("SFO", "SFORB")) { + if(spec[[varname]]$type == "SFORB") { + k_compound_sink <- paste("k", new_boxes[[1]], "sink", sep="_") + sink_term <- paste("-", k_compound_sink, "*", new_boxes[[1]]) + new_diffs[[1]] <- paste(new_diffs[[1]], sink_term) + new_parms <- k_compound_sink + } + } + + # Add reversible binding if appropriate + if(spec[[varname]]$type == "SFORB") { + k_free_bound <- paste("k", varname, "free", "bound", sep="_") + k_bound_free <- paste("k", varname, "bound", "free", sep="_") + reversible_binding_terms <- c( + paste("-", k_free_bound, "*", new_boxes[[1]], "+", k_bound_free, "*", new_boxes[[2]]), + paste("+", k_free_bound, "*", new_boxes[[1]], "-", k_bound_free, "*", new_boxes[[2]])) + new_diffs <- paste(new_diffs, reversible_binding_terms) + new_parms <- c(new_parms, k_free_bound, k_bound_free) + } + + # Add observed variable to model + parms <- c(parms, new_parms) + names(new_diffs) <- new_boxes + diffs <- c(diffs, new_diffs) + } + # Transfer between compartments + for (varname in obs_vars) { + to <- spec[[varname]]$to + if(!is.null(to)) { + origin_box <- switch(spec[[varname]]$type, + SFO = varname, + FOMC = varname, + DFOP = varname, + HS = varname, + SFORB = paste(varname, "free", sep="_")) + fraction_left <- NULL + for (target in to) { + target_box <- switch(spec[[target]]$type, + SFO = target, + SFORB = paste(target, "free", sep="_")) + # SFO is no longer special + #if(spec[[varname]]$type %in% c("SFO", "SFORB")) { + if(spec[[varname]]$type == "SFORB") { + k_from_to <- paste("k", origin_box, target_box, sep="_") + diffs[[origin_box]] <- paste(diffs[[origin_box]], "-", + k_from_to, "*", origin_box) + diffs[[target_box]] <- paste(diffs[[target_box]], "+", + k_from_to, "*", origin_box) + parms <- c(parms, k_from_to) + } + # Handle SFO like the others +# if(spec[[varname]]$type %in% c("FOMC", "DFOP", "HS")) { + if(spec[[varname]]$type %in% c("FOMC", "DFOP", "HS", "SFO")) { + if ( length(to)==1 && !spec[[varname]]$sink ) { + # There is only one output, so no need for any flow fractions. Just add the whole flow from the parent + diffs[[target_box]] <- paste(diffs[[target_box]], "+", spec[[varname]]$nlt) + } else { + fraction_to_target = paste("f", varname, "to", target, sep="_") + fraction_not_to_target = paste("(1 - ", fraction_to_target, ")", sep="") + if(is.null(fraction_left)) { + fraction_really_to_target = fraction_to_target + fraction_left = fraction_not_to_target + } else { + # If this is the last output and there is no sink, it gets what's left + if ( target==tail(to,1) && !spec[[varname]]$sink ) { + fraction_really_to_target = fraction_left + } else { +# (1-fa)fb version +# fraction_really_to_target = paste(fraction_left, " * ", fraction_to_target, sep="") +# fraction_left = paste(fraction_left, " * ", fraction_not_to_target, sep="") +# fb version + fraction_really_to_target = fraction_to_target + fraction_left = paste(fraction_left, " - ", fraction_to_target, sep="") + } + } + ff[target_box] = fraction_really_to_target + diffs[[target_box]] <- paste(diffs[[target_box]], "+", ff[target_box], "*", spec[[varname]]$nlt) + # Add the flow fraction parameter (if it exists) + if ( target!=tail(to,1) || spec[[varname]]$sink ) { + parms <- c(parms, fraction_to_target) + } + } + } + } + } + } + model <- list(diffs = diffs, parms = parms, map = map) + + # Create coefficient matrix if appropriate + if (mat) { + boxes <- names(diffs) + n <- length(boxes) + m <- matrix(nrow=n, ncol=n, dimnames=list(boxes, boxes)) + for (from in boxes) { + for (to in boxes) { + if (from == to) { + #@@@@ OMIT NEXT LINE? !!!!! + k.candidate = paste("k", from, c(boxes, "sink"), sep="_") + k.candidate = sub("free.*bound", "free_bound", k.candidate) + k.candidate = sub("bound.*free", "bound_free", k.candidate) + k.effective = intersect(model$parms, k.candidate) + m[from,to] = ifelse(length(k.effective) > 0, + paste("-", k.effective, collapse = " "), "0") + } else { + k.candidate = paste("k", from, to, sep="_") + k.candidate = sub("free.*bound", "free_bound", k.candidate) + k.candidate = sub("bound.*free", "bound_free", k.candidate) + k.effective = intersect(model$parms, k.candidate) + m[to, from] = ifelse(length(k.effective) > 0, + k.effective, "0") + } + } + } + model$coefmat <- m + } + + if (exists("ff")) model$ff = ff + class(model) <- "mkinmod" + invisible(model) +} diff --git a/CakeNullPlot.R b/CakeNullPlot.R new file mode 100644 index 0000000..3eacd67 --- /dev/null +++ b/CakeNullPlot.R @@ -0,0 +1,127 @@ +#$Id: CakeNullPlot.R 216 2011-07-05 14:35:03Z nelr $ +# Generates model curves so the GUI can plot them. Used when all params are fixed so there was no fit +# The CAKE R modules are based on mkin +# Based on code in IRLSkinfit +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta +# Author: Rob Nelson (Tessella) +# Tessella Project Reference: 6245 + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see .” + +CakeNullPlot <- function(model, parms.ini, state.ini, observed, xlim = range(observed$time), digits = max(3, getOption("digits") - 3), ...) +{ + # Print the fixed values + fixed <- data.frame(value = c(state.ini, parms.ini)) + fixed$type = c(rep("state", length(state.ini)), rep("deparm", length(parms.ini))) + cat("\nFixed parameter values:\n") + print(fixed) + + # Print disappearence times + obs_vars = unique(as.character(observed$name)) + distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), + DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) + for (obs_var in obs_vars) { + type = names(model$map[[obs_var]])[1] + if(exists("DT50")) distimes[obs_var, ] = c(DT50, DT90) + distimes[obs_var, ] = CakeDT(type,obs_var,parms.ini) + } + cat("\nEstimated disappearance times:\n") + print(distimes, digits=digits,...) + + # Plot the model + + outtimes <- seq(0, xlim[2], length.out=101) + + odeini <- state.ini + odenames <- model$parms + odeparms <- params.ini + + # Solve the system + evalparse <- function(string) + { + eval(parse(text=string), as.list(c(odeparms, odeini))) + } + + mod_vars <- names(model$diffs) + mkindiff <- function(t, state, parms) { + time <- t + diffs <- vector() + for (box in mod_vars) { + diffname <- paste("d", box, sep = "_") + diffs[diffname] <- with(as.list(c(time, state, parms)), + eval(parse(text = model$diffs[[box]]))) + } + return(list(c(diffs))) + } + + # Ensure initial state is at time 0 + obstimes <- unique(c(0,observed$time)) + out <- ode( + y = odeini, + times = obstimes, + func = mkindiff, + parms = odeparms, + ) + + # Output transformation for models with unobserved compartments like SFORB + out_transformed <- data.frame(time = out[,"time"]) + for (var in names(model$map)) { + if(length(model$map[[var]]) == 1) { + out_transformed[var] <- out[, var] + } else { + out_transformed[var] <- rowSums(out[, model$map[[var]]]) + } + } + + predicted_long <- mkin_wide_to_long(out_transformed,time="time") + # Calculate chi2 error levels according to FOCUS (2006) + errmin<-CakeChi2(observed, predicted_long, obs_vars, c(), c()) + cat("\nChi2 error levels in percent:\n") + errmin$err.min <- 100 * errmin$err.min + print(errmin, digits=digits,...) + + + # Fit to data + data<-observed + data$err<-rep(NA,length(data$time)) + data<-merge(data, predicted_long, by=c("time","name")) + names(data)<-c("time", "variable", "observed","err-var", "predicted") + data$residual<-data$observed-data$predicted + data<-data[order(data$variable,data$time),] + cat("\nData:\n") + print(format(data, digits = digits, scientific = FALSE,...), row.names = FALSE) + + + out <- ode( + y = odeini, + times = outtimes, + func = mkindiff, + parms = odeparms, + ) + + # Output transformation for models with unobserved compartments like SFORB + out_transformed <- data.frame(time = out[,"time"]) + for (var in names(model$map)) { + if(length(model$map[[var]]) == 1) { + out_transformed[var] <- out[, var] + } else { + out_transformed[var] <- rowSums(out[, model$map[[var]]]) + } + } + + cat("\n\n") + print(out_transformed) + cat("\n") +} + diff --git a/CakeOlsFit.R b/CakeOlsFit.R new file mode 100644 index 0000000..75ac471 --- /dev/null +++ b/CakeOlsFit.R @@ -0,0 +1,326 @@ +# Originally: mkinfit.R 87 2010-12-09 07:31:59Z jranke $ + +# Based on code in mkinfit +# Portions Johannes Ranke 2010 +# Contact: mkin-devel@lists.berlios.de +# The summary function is an adapted and extended version of summary.modFit +# from the FME package, v 1.1 by Soetart and Petzoldt, which was in turn +# inspired by summary.nls.lm + +#$Id: CakeOlsFit.R 216 2011-07-05 14:35:03Z nelr $ +# This version has been modified to expect SFO parameterised as k and flow fractions +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta +# Authors: Rob Nelson, Richard Smith +# Tessella Project Reference: 6245 + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see .” + +CakeOlsFit <- function(mkinmod, observed, + parms.ini = rep(0.1, length(mkinmod$parms)), + state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), + lower = 0, upper = Inf, + fixed_parms = NULL, + fixed_initials = names(mkinmod$diffs)[-1], + eigen = TRUE, + plot = FALSE, quiet = FALSE, + err = NULL, weight = "none", scaleVar = FALSE, + atol = 1e-6, + ...) +{ + mod_vars <- names(mkinmod$diffs) + # Subset dataframe with mapped (modelled) variables + observed <- subset(observed, name %in% names(mkinmod$map)) + # Get names of observed variables + obs_vars = unique(as.character(observed$name)) + + # Name the parameters if they are not named yet + if(is.null(names(parms.ini))) names(parms.ini) <- mkinmod$parms + + # Name the inital parameter values if they are not named yet + if(is.null(names(state.ini))) names(state.ini) <- mod_vars + + # Parameters to be optimised + parms.fixed <- parms.ini[fixed_parms] + optim_parms <- setdiff(names(parms.ini), fixed_parms) + parms.optim <- parms.ini[optim_parms] + + state.ini.fixed <- state.ini[fixed_initials] + optim_initials <- setdiff(names(state.ini), fixed_initials) + state.ini.optim <- state.ini[optim_initials] + state.ini.optim.boxnames <- names(state.ini.optim) + if(length(state.ini.optim) > 0) { + names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_") + } + + # Decide if the solution of the model can be based on a simple analytical + # formula, the spectral decomposition of the matrix (fundamental system) + # or a numeric ode solver from the deSolve package + if (length(mkinmod$map) == 1) { + solution = "analytical" + } else { + if (is.matrix(mkinmod$coefmat) & eigen) solution = "eigen" + else solution = "deSolve" + } + + # Create a function calculating the differentials specified by the model + # if necessary + if(solution == "deSolve") { + mkindiff <- function(t, state, parms) { + time <- t + diffs <- vector() + for (box in mod_vars) + { + diffname <- paste("d", box, sep="_") + diffs[diffname] <- with(as.list(c(time,state, parms)), + eval(parse(text=mkinmod$diffs[[box]]))) + } + return(list(c(diffs))) + } + } + + cost.old <- 1e100 + calls <- 0 + out_predicted <- NA + # Define the model cost function + cost <- function(P) + { + assign("calls", calls+1, inherits=TRUE) + if(length(state.ini.optim) > 0) { + odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed) + names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed)) + } else odeini <- state.ini.fixed + + odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], parms.fixed) + + # Ensure initial state is at time 0 + outtimes = unique(c(0,observed$time)) + evalparse <- function(string) + { + eval(parse(text=string), as.list(c(odeparms, odeini))) + } + + # Solve the system + if (solution == "analytical") { + parent.type = names(mkinmod$map[[1]])[1] + parent.name = names(mkinmod$diffs)[[1]] + o <- switch(parent.type, + SFO = SFO.solution(outtimes, + evalparse(parent.name), + evalparse(paste("k", parent.name, sep="_"))), +# evalparse("k")), +# evalparse(paste("k", parent.name, "sink", sep="_"))), + FOMC = FOMC.solution(outtimes, + evalparse(parent.name), + evalparse("alpha"), evalparse("beta")), + DFOP = DFOP.solution(outtimes, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("g")), + HS = HS.solution(outtimes, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("tb")), + SFORB = SFORB.solution(outtimes, + evalparse(parent.name), + evalparse(paste("k", parent.name, "bound", sep="_")), + evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")), + evalparse(paste("k", parent.name, "sink", sep="_"))) + ) + out <- cbind(outtimes, o) + dimnames(out) <- list(outtimes, c("time", sub("_free", "", parent.name))) + } + if (solution == "eigen") { + coefmat.num <- matrix(sapply(as.vector(mkinmod$coefmat), evalparse), + nrow = length(mod_vars)) + e <- eigen(coefmat.num) + c <- solve(e$vectors, odeini) + f.out <- function(t) { + e$vectors %*% diag(exp(e$values * t), nrow=length(mod_vars)) %*% c + } + o <- matrix(mapply(f.out, outtimes), + nrow = length(mod_vars), ncol = length(outtimes)) + dimnames(o) <- list(mod_vars, outtimes) + out <- cbind(time = outtimes, t(o)) + } + if (solution == "deSolve") + { + out <- ode( + y = odeini, + times = outtimes, + func = mkindiff, + parms = odeparms, + atol = atol + ) + } + + # Output transformation for models with unobserved compartments like SFORB + out_transformed <- data.frame(time = out[,"time"]) + for (var in names(mkinmod$map)) { + if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") { + out_transformed[var] <- out[, var] + } else { + out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) + } + } + assign("out_predicted", out_transformed, inherits=TRUE) + + mC <- CakeCost(out_transformed, observed, y = "value", + err = err, weight = weight, scaleVar = scaleVar) + mC$penalties <- CakePenalties(odeparms, out_transformed, observed) + mC$model <- mC$cost + mC$penalties; + if (mC$model < cost.old) { + if (!quiet) + cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model, + "\n") + + # Plot the data and current model output if requested + if(plot) { + outtimes_plot = seq(min(observed$time), max(observed$time), length.out=100) + if (solution == "analytical") { + o_plot <- switch(parent.type, + SFO = SFO.solution(outtimes_plot, + evalparse(parent.name), + evalparse(paste("k", parent.name, sep="_"))), +# evalparse(paste("k", parent.name, "sink", sep="_"))), + FOMC = FOMC.solution(outtimes_plot, + evalparse(parent.name), + evalparse("alpha"), evalparse("beta")), + DFOP = DFOP.solution(outtimes_plot, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("g")), + HS = HS.solution(outtimes_plot, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("tb")), + SFORB = SFORB.solution(outtimes_plot, + evalparse(parent.name), + evalparse(paste("k", parent.name, "bound", sep="_")), + evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")), + evalparse(paste("k", parent.name, "sink", sep="_"))) + ) + out_plot <- cbind(outtimes_plot, o_plot) + dimnames(out_plot) <- list(outtimes_plot, c("time", sub("_free", "", parent.name))) + } + if(solution == "eigen") { + o_plot <- matrix(mapply(f.out, outtimes_plot), + nrow = length(mod_vars), ncol = length(outtimes_plot)) + dimnames(o_plot) <- list(mod_vars, outtimes_plot) + out_plot <- cbind(time = outtimes_plot, t(o_plot)) + } + if (solution == "deSolve") { + out_plot <- ode( + y = odeini, + times = outtimes_plot, + func = mkindiff, + parms = odeparms) + } + out_transformed_plot <- data.frame(time = out_plot[,"time"]) + for (var in names(mkinmod$map)) { + if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") { + out_transformed_plot[var] <- out_plot[, var] + } else { + out_transformed_plot[var] <- rowSums(out_plot[, mkinmod$map[[var]]]) + } + } + out_transformed_plot <<- out_transformed_plot + + plot(0, type="n", + xlim = range(observed$time), ylim = range(observed$value, na.rm=TRUE), + xlab = "Time", ylab = "Observed") + col_obs <- pch_obs <- 1:length(obs_vars) + names(col_obs) <- names(pch_obs) <- obs_vars + for (obs_var in obs_vars) { + points(subset(observed, name == obs_var, c(time, value)), + pch = pch_obs[obs_var], col = col_obs[obs_var]) + } + matlines(out_transformed_plot$time, out_transformed_plot[-1]) + legend("topright", inset=c(0.05, 0.05), legend=obs_vars, + col=col_obs, pch=pch_obs, lty=1:length(pch_obs)) + } + + assign("cost.old", mC$model, inherits=TRUE) + } + # HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost + mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res) + + return(mC) + } + fit <-modFit(cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, ...) + + # We need to return some more data for summary and plotting + fit$solution <- solution + if (solution == "eigen") { + fit$coefmat <- mkinmod$coefmat + } + if (solution == "deSolve") { + fit$mkindiff <- mkindiff + } + if (plot == TRUE) { + fit$out_transformed_plot = out_transformed_plot + } + + # We also need various other information for summary and plotting + fit$map <- mkinmod$map + fit$diffs <- mkinmod$diffs + + # mkin_long_to_wide does not handle ragged data +# fit$observed <- mkin_long_to_wide(observed) + fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") + names(fit$observed) <- c("time", as.vector(unique(observed$name))) + + predicted_long <- mkin_wide_to_long(out_predicted, time = "time") + fit$predicted <- out_predicted + + # Collect initial parameter values in two dataframes + fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) + fit$start$type = c(rep("state", length(state.ini.optim)), rep("deparm", length(parms.optim))) + fit$start$lower <- lower + fit$start$upper <- upper + + fit$fixed <- data.frame( + value = c(state.ini.fixed, parms.fixed)) + fit$fixed$type = c(rep("state", length(state.ini.fixed)), rep("deparm", length(parms.fixed))) + +# # Calculate chi2 error levels according to FOCUS (2006) + fit$errmin <- CakeChi2(observed, predicted_long, obs_vars, parms.optim, state.ini.optim) + + # Calculate dissipation times DT50 and DT90 and formation fractions + parms.all = c(fit$par, parms.fixed) + fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)), + row.names = obs_vars) + fit$ff <- vector() + fit$SFORB <- vector() + for (obs_var in obs_vars) { + type = names(mkinmod$map[[obs_var]])[1] + + fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all) + } + + fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed) + + # Collect observed, predicted and residuals + data<-observed + data$err<-rep(NA,length(data$time)) + data <- merge(data, predicted_long, by = c("time", "name")) + names(data)<-c("time", "variable", "observed","err-var", "predicted") + data$residual <- data$observed - data$predicted + data$variable <- ordered(data$variable, levels = obs_vars) + fit$data <- data[order(data$variable, data$time), ] + fit$atol <- atol + + class(fit) <- c("CakeFit", "mkinfit", "modFit") + return(fit) +} + diff --git a/CakeOlsPlot.R b/CakeOlsPlot.R new file mode 100644 index 0000000..199cc28 --- /dev/null +++ b/CakeOlsPlot.R @@ -0,0 +1,117 @@ +#$Id: CakeOlsPlot.R 216 2011-07-05 14:35:03Z nelr $ +# Generates fitted curves so the GUI can plot them +# Based on code in IRLSkinfit +# Author: Rob Nelson (Tessella) +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta +# Tessella Project Reference: 6245 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see .” + +CakeOlsPlot <- function(fit, xlim = range(fit$data$time), ...) +{ + cat("\n") + + solution = fit$solution + if ( is.null(solution) ) { + solution <- "deSolve" + } + atol = fit$atol + if ( is.null(atol) ) { + atol = 1.0e-6 + } + + fixed <- fit$fixed$value + names(fixed) <- rownames(fit$fixed) + parms.all <- c(fit$par, fixed) + ininames <- c( + rownames(subset(fit$start, type == "state")), + rownames(subset(fit$fixed, type == "state"))) + odeini <- parms.all[ininames] + names(odeini) <- names(fit$diffs) + + outtimes <- seq(0, xlim[2], length.out=101) + + odenames <- c( + rownames(subset(fit$start, type == "deparm")), + rownames(subset(fit$fixed, type == "deparm"))) + odeparms <- parms.all[odenames] + + # Solve the system + evalparse <- function(string) + { + eval(parse(text=string), as.list(c(odeparms, odeini))) + } + if (solution == "analytical") { + parent.type = names(fit$map[[1]])[1] + parent.name = names(fit$diffs)[[1]] + o <- switch(parent.type, + SFO = SFO.solution(outtimes, + evalparse(parent.name), + evalparse(paste("k", parent.name, sep="_"))), + FOMC = FOMC.solution(outtimes, + evalparse(parent.name), + evalparse("alpha"), evalparse("beta")), + DFOP = DFOP.solution(outtimes, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("g")), + HS = HS.solution(outtimes, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("tb")), + SFORB = SFORB.solution(outtimes, + evalparse(parent.name), + evalparse(paste("k", parent.name, "free_bound", sep="_")), + evalparse(paste("k", parent.name, "bound_free", sep="_")), + evalparse(paste("k", parent.name, "free_sink", sep="_"))) + ) + out <- cbind(outtimes, o) + dimnames(out) <- list(outtimes, c("time", parent.name)) + } + if (solution == "eigen") { + coefmat.num <- matrix(sapply(as.vector(fit$coefmat), evalparse), + nrow = length(odeini)) + e <- eigen(coefmat.num) + c <- solve(e$vectors, odeini) + f.out <- function(t) { + e$vectors %*% diag(exp(e$values * t), nrow=length(odeini)) %*% c + } + o <- matrix(mapply(f.out, outtimes), + nrow = length(odeini), ncol = length(outtimes)) + dimnames(o) <- list(names(odeini), NULL) + out <- cbind(time = outtimes, t(o)) + } + if (solution == "deSolve") { + out <- ode( + y = odeini, + times = outtimes, + func = fit$mkindiff, + parms = odeparms, + atol = atol + ) + } + + # Output transformation for models with unobserved compartments like SFORB + out_transformed <- data.frame(time = out[,"time"]) + for (var in names(fit$map)) { + if(length(fit$map[[var]]) == 1) { + out_transformed[var] <- out[, var] + } else { + out_transformed[var] <- rowSums(out[, fit$map[[var]]]) + } + } + print(out_transformed) + + cat("\n") +} diff --git a/CakePenalties.R b/CakePenalties.R new file mode 100644 index 0000000..3b2a0df --- /dev/null +++ b/CakePenalties.R @@ -0,0 +1,56 @@ +# $Id: CakePenalties.R 216 2011-07-05 14:35:03Z nelr $ +# The CAKE R modules are based on mkin +# CAKE (6245), by Tessella, for Syngenta: Copyright (C) 2011 Syngenta +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see .” + +# Penalty function for fits that should not be accepted +CakePenalties<-function(params, modelled, obs, penalty.scale = 10, ...){ + # Because the model cost is roughly proportional to points, + # make the penalties scale that way too for consistent behaviour + # with many or few data points + sum(CakePenaltiesLong(params, modelled, obs, penalty.scale, ...)$value) * dim(obs)[[1]] +} +CakePenaltiesLong<-function(params, modelled, obs, penalty.scale = 10, ...){ + r<-data.frame(name=character(0), value=numeric(0)) + + # Flow fractions > 1 + #------------------------------- + fails<-CakePenaltiesFF(params); + if(dim(fails)[[1]] > 0){ + #print("Penalty failures:") + #print(fails) + # Add a failure record for each flow fraction that missed + names(fails)<-c('name', 'value') + fails$value = 100*(fails$value - 1) + fails$name = lapply(fails$name, function(x) paste("FF_", x, sep="")) + r <- rbind(r, fails) + #totalPenalties <- totalPenalties + 100*sum(fails$x - 1) # Penalty for failure + } + + r$value <- r$value * penalty.scale + return(r) +} + +CakePenaltiesFF<-function(params){ + ff.values<-params["f_"==substr(names(params), 0, 2)] # Flow fractions + if(length(ff.values) > 0){ + ffs<-data.frame(t(sapply(strsplit(names(ff.values), '_'), FUN=function(x){ x[c(2,4)] }))) # Split into source/dest + names(ffs)<-c('source', 'dest') + ffs['value'] <- ff.values # Add values + sums<-aggregate(ffs$value, list(s=ffs$source), sum) # Sum of flows from each source + fails<-sums[sums$x>1.00001,] # All compartments > 1 + } else { fails<-data.frame(s=character(0), x=numeric(0)) } + return(fails) +} \ No newline at end of file diff --git a/CakeSummary.R b/CakeSummary.R new file mode 100644 index 0000000..ec42e75 --- /dev/null +++ b/CakeSummary.R @@ -0,0 +1,301 @@ +# $Id: CakeSummary.R 216 2011-07-05 14:35:03Z nelr $ +# CakeSummary: Functions to calculate statistics used in the summary, +# and display the summary itself. + +# Parts modified from package mkin +# Parts Tessella (Rob Nelson/Richard Smith) for Syngenta: Copyright (C) 2011 Syngenta +# Tessella Project Reference: 6245 + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see .” + +# Calculate the decay times for a compartment +# Arguments +# type - type of compartment +# obs_var - compartment name +# parms.all - list of parameters +CakeDT<-function(type, obs_var, parms.all) { + if (type == "SFO") { + # Changed to new parameterisation + #k_names = grep(paste("k", obs_var, sep="_"), names(parms.all), value=TRUE) + k_name = paste("k", obs_var, sep="_") + k_tot = parms.all[k_name] + DT50 = log(2)/k_tot + DT90 = log(10)/k_tot + } + if (type == "FOMC") { + alpha = parms.all["alpha"] + beta = parms.all["beta"] + DT50 = beta * (2^(1/alpha) - 1) + DT90 = beta * (10^(1/alpha) - 1) + } + if (type == "DFOP") { + k1 = parms.all["k1"] + k2 = parms.all["k2"] + g = parms.all["g"] + f <- function(t, x) { + fraction <- g * exp( - k1 * t) + (1 - g) * exp( - k2 * t) + (fraction - (1 - x/100))^2 + } + DTmax <- 1000 + DT50.o <- optimize(f, c(0.001, DTmax), x=50)$minimum + DT50 = ifelse(DTmax - DT50.o < 0.1, NA, DT50.o) + DT90.o <- optimize(f, c(0.001, DTmax), x=90)$minimum + DT90 = ifelse(DTmax - DT90.o < 0.1, NA, DT90.o) + } + if (type == "HS") { + k1 = parms.all["k1"] + k2 = parms.all["k2"] + tb = parms.all["tb"] + DTx <- function(x) { + DTx.a <- (log(100/(100 - x)))/k1 + DTx.b <- tb + (log(100/(100 - x)) - k1 * tb)/k2 + if (DTx.a < tb) DTx <- DTx.a + else DTx <- DTx.b + return(DTx) + } + DT50 <- DTx(50) + DT90 <- DTx(90) + } + if (type == "SFORB") { + # FOCUS kinetics (2006), p. 60 f + k_out_names = grep(paste("k", obs_var, "free", sep="_"), names(parms.all), value=TRUE) + k_out_names = setdiff(k_out_names, paste("k", obs_var, "free", "bound", sep="_")) + k_1output = sum(parms.all[k_out_names]) + k_12 = parms.all[paste("k", obs_var, "free", "bound", sep="_")] + k_21 = parms.all[paste("k", obs_var, "bound", "free", sep="_")] + + sqrt_exp = sqrt(1/4 * (k_12 + k_21 + k_1output)^2 + k_12 * k_21 - (k_12 + k_1output) * k_21) + b1 = 0.5 * (k_12 + k_21 + k_1output) + sqrt_exp + b2 = 0.5 * (k_12 + k_21 + k_1output) - sqrt_exp + + SFORB_fraction = function(t) { + ((k_12 + k_21 - b1)/(b2 - b1)) * exp(-b1 * t) + + ((k_12 + k_21 - b2)/(b1 - b2)) * exp(-b2 * t) + } + f_50 <- function(t) (SFORB_fraction(t) - 0.5)^2 + max_DT <- 1000 + DT50.o <- optimize(f_50, c(0.01, max_DT))$minimum + if (abs(DT50.o - max_DT) < 0.01) DT50 = NA else DT50 = DT50.o + f_90 <- function(t) (SFORB_fraction(t) - 0.1)^2 + DT90.o <- optimize(f_90, c(0.01, 1000))$minimum + if (abs(DT90.o - max_DT) < 0.01) DT90 = NA else DT90 = DT90.o + } + ret<-c(DT50, DT90) + names(ret)<-c("DT50","DT90") + ret +} + +# Compute chi2 etc +# Arguments +# observed - data.frame of observed data +# predicted_long - fitted values etc +# obs_vars - compartment names +# parms.optim - list of fitted parameters +# state,ini.optim - list of fitted initial values +# +CakeChi2<-function(observed, predicted_long, obs_vars, parms.optim, state.ini.optim) { + # Calculate chi2 error levels according to FOCUS (2006) + means <- aggregate(value ~ time + name, data = observed, mean, na.rm=TRUE) + errdata <- merge(means, predicted_long, by = c("time", "name"), suffixes = c("_mean", "_pred")) + errdata <- errdata[order(errdata$time, errdata$name), ] + errmin.overall <- mkinerrmin(errdata, length(parms.optim) + length(state.ini.optim)) + + errmin <- data.frame(err.min = errmin.overall$err.min, + n.optim = errmin.overall$n.optim, df = errmin.overall$df) + rownames(errmin) <- "All data" + for (obs_var in obs_vars) + { + errdata.var <- subset(errdata, name == obs_var) + n.k.optim <- length(grep(paste("k", obs_var, sep="_"), names(parms.optim))) + n.initials.optim <- length(grep(paste(obs_var, ".*", "_0", sep=""), names(state.ini.optim))) + n.optim <- n.k.optim + n.initials.optim + if ("alpha" %in% names(parms.optim)) n.optim <- n.optim + 1 + if ("beta" %in% names(parms.optim)) n.optim <- n.optim + 1 + if ("k1" %in% names(parms.optim)) n.optim <- n.optim + 1 + if ("k2" %in% names(parms.optim)) n.optim <- n.optim + 1 + if ("g" %in% names(parms.optim)) n.optim <- n.optim + 1 + if ("tb" %in% names(parms.optim)) n.optim <- n.optim + 1 + errmin.tmp <- mkinerrmin(errdata.var, n.optim) + errmin[obs_var, c("err.min", "n.optim", "df")] <- errmin.tmp + } + errmin +} + +# Compute additional statistics (rē/efficiency) of obs v pred for each compartment and the whole problem +CakeAdditionalStats<-function(obsvspred){ + agg<-aggregate(obsvspred[c('observed', 'err-var', 'predicted', 'residual')], list(name=obsvspred$variable), function(x){x}, simplify=FALSE) + frames<-apply(agg, 1, as.data.frame); + names(frames)<-agg$name; + frames$all<-obsvspred + + t(sapply(frames, function(frame){ + # This function takes a data frame for a single variable and makes the stats for it + # rē: FOCUS eq 6.5 p. 97 + # Efficiency: eq 6.6 p. 99 + do <- frame$observed - mean(frame$observed) + dp <- frame$predicted - mean(frame$predicted) + r2 <- ( sum(do * dp) / sqrt(sum(do^2) * sum(dp^2)) ) ^ 2 + eff <- 1 - ( sum((frame$predicted - frame$observed)^2) / sum(do^2) ) + list(r2=r2, eff=eff) + })) +} + +# Summarise a fit - used for CakeIrlsFit and CakeOlsFit +summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, ff = TRUE, cov = FALSE,...) { + param <- object$par + pnames <- names(param) + p <- length(param) + covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance + + rdf <- object$df.residual + resvar <- object$ssr / rdf + modVariance <- object$ssr / length(object$residuals) + + if (!is.numeric(covar)) { +# message <- "Cannot estimate covariance; system is singular" +# warning(message) +# covar <- matrix(data = NA, nrow = p, ncol = p) + covar=NULL + } else { + message <- "ok" + rownames(covar) <- colnames(covar) <-pnames + se <- sqrt(diag(covar) * resvar) + names(se) <- pnames + tval <- param / se + } + + if (!all(object$start$lower >=0)) { + message <- "Note that the one-sided t-test may not be appropriate if + parameter values below zero are possible." + warning(message) + } else message <- "ok" + + if(cov && !is.null(covar)) { + alpha <- 0.05 + lci <- param + qt(alpha/2,rdf)*se + uci <- param + qt(1-alpha/2,rdf)*se + param <- cbind(param, se, tval, pt(tval, rdf, lower.tail = FALSE), lci, uci) + dimnames(param) <- list(pnames, c("Estimate", "Std. Error", + "t value", "Pr(>t)", "Lower CI", "Upper CI")) + ans <- list(residuals = object$residuals, + residualVariance = resvar, + sigma = sqrt(resvar), + modVariance = modVariance, + df = c(p, rdf), cov.unscaled = covar, + cov.scaled = covar * resvar, + info = object$info, niter = object$iterations, + stopmess = message, + par = param) + } else { + param <- cbind(param) + ans <- list(residuals = object$residuals, + residualVariance = resvar, + sigma = sqrt(resvar), + modVariance = modVariance, + df = c(p, rdf), + info = object$info, niter = object$iterations, + stopmess = message, + par = param) + } + + ans$diffs <- object$diffs + if(data) { + ans$data <- object$data + ans$additionalstats = CakeAdditionalStats(object$data) + } + ans$start <- object$start + ans$fixed <- object$fixed + ans$errmin <- object$errmin + ans$penalties <- object$penalties + if(distimes) ans$distimes <- object$distimes + if(ff) ans$ff <- object$ff + if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB + class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.modFit") + return(ans) +} + +# Print a summary. Used for CakeIrlsFit, CakeOlsFit and CakeMcmcFit +# Expanded from print.summary.modFit +print.summary.CakeFit <- function(x, digits = max(3, getOption("digits") - 3), ...) { + cat("\nEquations:\n") + print(noquote(as.character(x[["diffs"]]))) + df <- x$df + rdf <- df[2] + + cat("\nStarting values for optimised parameters:\n") + print(x$start) + + cat("\nFixed parameter values:\n") + if(length(x$fixed$value) == 0) cat("None\n") + else print(x$fixed) + + if ( !is.null(x$par) ) { + cat("\nOptimised parameters:\n") + printCoefmat(x$par, digits = digits, ...) + } + + cat("\nResidual standard error:", + format(signif(x$sigma, digits)), "on", rdf, "degrees of freedom\n") + + cat("\nChi2 error levels in percent:\n") + x$errmin$err.min <- 100 * x$errmin$err.min + print(x$errmin, digits=digits,...) + + printdistimes <- !is.null(x$distimes) + if(printdistimes){ + cat("\nEstimated disappearance times:\n") + print(x$distimes, digits=digits,...) + } + + printff <- !is.null(x$ff) + if(printff){ + cat("\nEstimated formation fractions:\n") + print(data.frame(ff = x$ff), digits=digits,...) + } + + printSFORB <- !is.null(x$SFORB) + if(printSFORB){ + cat("\nEstimated Eigenvalues of SFORB model(s):\n") + print(x$SFORB, digits=digits,...) + } + + printcor <- !is.null(x$cov.unscaled) + if (printcor){ + Corr <- cov2cor(x$cov.unscaled) + rownames(Corr) <- colnames(Corr) <- rownames(x$par) + cat("\nParameter correlation:\n") + print(Corr, digits = digits, ...) + } + + printdata <- !is.null(x$data) + if (printdata){ + cat("\nData:\n") + print(format(x$data, digits = digits, scientific = FALSE,...), row.names = FALSE) + } + + if(!is.null(x$additionalstats)){ + cat("\nAdditional Stats:\n"); + print(x$additionalstats, digits=digits, ...) + } + + if(!(is.null(x$penalties) || 0 == dim(x$penalties)[[1]])){ + cat("\nPenalties:\n"); + print(x$penalties, digits=digits, row.names = FALSE, ...) + } + + if ( !is.null(x$seed) ) { + cat("\nRandom Seed:", x$seed, "\n") + } + invisible(x) +} -- cgit v1.2.1