diff options
| author | Johannes Ranke <jranke@uni-bremen.de> | 2017-10-18 10:17:59 +0200 | 
|---|---|---|
| committer | Johannes Ranke <jranke@uni-bremen.de> | 2017-10-18 10:17:59 +0200 | 
| commit | be6d42ef636e8e1c9fdcfa6f8738ee10e885d75b (patch) | |
| tree | b676def6da66527c056e25fa5a127b97ca3a5560 | |
Version 1.4v1.4
| -rw-r--r-- | CakeCost.R | 348 | ||||
| -rw-r--r-- | CakeInit.R | 39 | ||||
| -rw-r--r-- | CakeIrlsFit.R | 147 | ||||
| -rw-r--r-- | CakeIrlsPlot.R | 25 | ||||
| -rw-r--r-- | CakeMcmcFit.R | 292 | ||||
| -rw-r--r-- | CakeModel.R | 261 | ||||
| -rw-r--r-- | CakeNullPlot.R | 127 | ||||
| -rw-r--r-- | CakeOlsFit.R | 326 | ||||
| -rw-r--r-- | CakeOlsPlot.R | 117 | ||||
| -rw-r--r-- | CakePenalties.R | 56 | ||||
| -rw-r--r-- | CakeSummary.R | 301 | 
11 files changed, 2039 insertions, 0 deletions
diff --git a/CakeCost.R b/CakeCost.R new file mode 100644 index 0000000..1cc3981 --- /dev/null +++ b/CakeCost.R @@ -0,0 +1,348 @@ +## ----------------------------------------------------------------------------- +## The model cost and residuals +## ----------------------------------------------------------------------------- +# The CAKE R modules are based on mkin,  +# Call to approx is only performed if there are multiple non NA values +# which should prevent most of the crashes - Rob Nelson (Tessella) +# +# Modifications developed by Tessella Plc for Syngenta, Copyright (C) 2011 Syngenta +# Authors: Rob Nelson, Richard Smith +# Tessella Project Reference: 6245 +# +# This program is free software: you can +# redistribute them and/or modify them under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>#  +# +# $Id: CakeCost.R 216 2011-07-05 14:35:03Z nelr $ + +CakeCost <- function (model, obs, x = "time", y = NULL, err = NULL, +                     weight = "none", scaleVar = FALSE, cost = NULL, ...) { + +  ## convert vector to matrix +  if (is.vector(obs)) { +    cn <- names(obs) +    obs   <- matrix(data = obs, nrow = 1) +    colnames(obs) <- cn +  } +  if (is.vector(model)) { +    cn <- names(model) +    model <- matrix(data=model, nrow = 1) +    colnames(model) <- cn +  } + +  ##============================================================================= +  ## Observations +  ##============================================================================= + +  ## The position of independent variable(s) +  ix <- 0 +  if (! is.null(x))  {   # mapping required... +    ## For now multiple independent variables are not supported... +    if (length(x) > 1) +      stop ("multiple independent variables in 'obs' are not yet supported") + +    if (! is.character(x)) +      stop ("'x' should be the *name* of the column with the independent variable in 'obs' or NULL") +    ix  <- which(colnames(obs) %in% x) +    if (length(ix) != length(x)) +      stop(paste("Independent variable column not found in observations", x)) +  } else ix <- NULL + +  ## The position of weighing values +  ierr <- 0 +  if (! is.null(err)) { +    if (! is.character(err)) +      stop ("'err' should be the *name* of the column with the error estimates in obs or NULL") +    ierr <- which(colnames(obs) == err)    # only one +    if (length(ierr) == 0) +      stop(paste("Column with error estimates not found in observations", err)) +  } + +  ## The dependent variables +  type <- 1           # data input type: type 2 is table format, type 1 is long format... + +  if (!is.null(y)) {   # it is in table format; first column are names of observed data... + +    Names    <- as.character(unique(obs[, 1]))  # Names of data sets, all data should be model variables... +    Ndat     <- length(Names)                   # Number of data sets +    ilist    <- 1:Ndat +    if (! is.character(y)) +      stop ("'y' should be the *name* of the column with the values of the dependent variable in obs") +    iy  <- which(colnames(obs) == y) +    if (length(iy) == 0) +      stop(paste("Column with value of dependent variable not found in observations", y)) +    type <- 2 + +  } else  {             # it is a matrix, variable names are column names +    Ndat     <- NCOL(obs)-1 +    Names    <- colnames(obs) +    ilist    <- (1:NCOL(obs))        # column positions of the (dependent) observed variables +    exclude  <- ix                   # exclude columns that are not +    if (ierr > 0) +      exclude <- c(ix, ierr)          # exclude columns that are not +    if (length(exclude) > 0) +      ilist <- ilist[-exclude] +  } + +  #================================ +  # The model results +  #================================ + +  ModNames <- colnames(model)  # Names of model variables +  if (length(ix) > 1) { +    ixMod <- NULL + +    for ( i in 1:length(ix)) { +      ix2 <- which(colnames(model) == x[i]) +      if (length(ix2) == 0) +        stop(paste("Cannot calculate cost: independent variable not found in model output", x[i])) +      ixMod <- c(ixMod, ix2) +    } + +  xMod     <- model[,ixMod]    # Independent variable, model +  } else if (length(ix) == 1) { +   ixMod    <- which(colnames(model) == x) +   if (length(ixMod) == 0) +     stop(paste("Cannot calculate cost: independent variable not found in model output", x)) +   xMod     <- model[,ixMod]    # Independent variable, model +  } +  Residual <- NULL +  CostVar  <- NULL + +  #================================ +  # Compare model and data... +  #================================ +  xDat <- 0 +  iDat <- 1:nrow(obs) + +  for (i in ilist) {   # for each observed variable ... +    ii <- which(ModNames == Names[i]) +    if (length(ii) == 0) stop(paste("observed variable not found in model output", Names[i])) +    yMod <- model[,ii] +    if (type == 2)  {  # table format +      iDat <- which (obs[,1] == Names[i]) +      if (length(ix) > 0) xDat <- obs[iDat, ix] +      obsdat <- obs[iDat, iy] +    } else { +      if (length(ix) > 0) xDat <- obs[, 1] +      obsdat <- obs[,i] +    } +    ii <- which(is.na(obsdat)) +    if (length(ii) > 0) { +      xDat   <- xDat[-ii] +      obsdat <- obsdat[-ii] +    } + +    # CAKE version - Added tests for multiple non-NA values  +    if (length(ix) > 0 && length(unique(xMod[!is.na(xMod)]))>1 && length(yMod[!is.na(yMod)])>1) +    { +      ModVar <- approx(xMod, yMod, xout = xDat)$y +    } +    else { +      cat("CakeCost Warning: Only one valid point - using mean (yMod was", yMod, ")\n") +      ModVar <- mean(yMod[!is.na(yMod)]) +      obsdat <- mean(obsdat) +    } +    iex <- which(!is.na(ModVar)) +    ModVar <- ModVar[iex] +    obsdat <- obsdat[iex] +    xDat   <- xDat[iex] +    if (ierr > 0) { +      Err <- obs[iDat, ierr] +      Err <- Err[iex] +    } else { +      if (weight == "std") +        Err <- sd(obsdat) +      else if (weight == "mean") +        Err <- mean(abs(obsdat)) +      else if (weight == "none") +        Err <- 1 +      else +       stop("error: do not recognize 'weight'; should be one of 'none', 'std', 'mean'") +    } +    if (any(is.na(Err))) +      stop(paste("error: cannot estimate weighing for observed variable: ", Names[i])) +    if (min(Err) <= 0) +      stop(paste("error: weighing for observed variable is 0 or negative:", Names[i])) +    if (scaleVar) +      Scale <- 1/length(obsdat) +    else Scale <- 1 +    Res <- (ModVar- obsdat) +    res <- Res/Err +    resScaled <- res*Scale +    Residual <- rbind(Residual, +                      data.frame( +                        name   = Names[i], +                        x      = xDat, +                        obs    = obsdat, +                        mod    = ModVar, +                        weight = 1/Err, +                        res.unweighted = Res, +                        res    = res)) + +    CostVar <- rbind(CostVar, +                     data.frame( +                       name           = Names[i], +                       scale          = Scale, +                       N              = length(Res), +                       SSR.unweighted = sum(Res^2), +                       SSR.unscaled   = sum(res^2), +                       SSR            = sum(resScaled^2))) +  }  # end loop over all observed variables + +  ## SSR +  Cost  <- sum(CostVar$SSR * CostVar$scale) +   +  Lprob <- -sum(log(pmax(0, dnorm(Residual$mod, Residual$obs, Err)))) # avoid log of negative values +  #Lprob <- -sum(log(pmax(.Machine$double.xmin, dnorm(Residual$mod, Residual$obs, Err)))) #avoid log(0) + +  if (! is.null(cost)) { +    Cost     <- Cost + cost$model +    CostVar  <- rbind(CostVar, cost$var) +    Residual <- rbind(Residual, cost$residuals) +    Lprob    <- Lprob + cost$minlogp +  } +  out <- list(model = Cost, cost = Cost, minlogp = Lprob, var = CostVar, residuals = Residual) +  class(out) <- "modCost" +  return(out) +} + +## ----------------------------------------------------------------------------- +## S3 methods of modCost +## ----------------------------------------------------------------------------- + +plot.modCost<- function(x, legpos="topleft", ...) { +  nvar <- nrow(x$var) + +  dots <- list(...) + +  dots$xlab <- if(is.null(dots$xlab)) "x" else dots$xlab +  dots$ylab <- if(is.null(dots$ylab)) "weighted residuals" else dots$ylab +  DotsPch   <- if(is.null(dots$pch)) (16:24) else dots$pch +  dots$pch  <- if(is.null(dots$pch)) (16:24)[x$residuals$name] else dots$pch[x$residuals$name] +  DotsCol   <- if(is.null(dots$col)) (1:nvar) else dots$col +  dots$col  <- if(is.null(dots$col)) (1:nvar)[x$residuals$name] else dots$col[x$residuals$name] + +  do.call("plot", c(alist(x$residuals$x, x$residuals$res), dots)) + +#  plot(x$residuals$x, x$residuals$res, xlab="x", ylab="weighted residuals", +#     pch=c(16:24)[x$residuals$name],col=c(1:nvar)[x$residuals$name],...) + +  if (! is.na(legpos)) +    legend(legpos, legend = x$var$name, col = DotsCol, pch = DotsPch) +} + +## ----------------------------------------------------------------------------- +## Internal cost function for optimisers +## ----------------------------------------------------------------------------- +# Cost function. The returned structure must have $model +# Called from the middle of IrlsFit and OlsFit +# We need to preserve state between calls so make a closure +CakeInternalCostFunctions <- function(mkinmod, state.ini.optim, state.ini.optim.boxnames,  +                                    state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar,  +                                    quiet, plot=FALSE, atol=1e-6){ +    cost.old <- 1e+100 +    calls <- 0 +    out_predicted <- NA +	 +	get.predicted <- function(){ out_predicted } +	 +	get.best.cost <- function(){ cost.old } +	reset.best.cost <- function() { cost.old<<-1e+100 } +	 +	get.calls <- function(){ calls } +	set.calls <- function(newcalls){ calls <<- newcalls } +	 +	set.error<-function(err) { observed$err <<- err } +	 +	cost <- function(P) { +		assign("calls", calls + 1, inherits = TRUE) +		print(P) +		if (length(state.ini.optim) > 0) { +			odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed) +			names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed)) +		} +		else odeini <- state.ini.fixed +		odeparms <- c(P[(length(state.ini.optim) + 1):length(P)],  +			parms.fixed) +		# Ensure initial state is at time 0 +		outtimes = unique(c(0,observed$time)) +		out <- ode(y = odeini, times = outtimes, func = mkindiff,  +			parms = odeparms, atol=atol) +		out_transformed <- data.frame(time = out[, "time"]) +		for (var in names(mkinmod$map)) { +			if (length(mkinmod$map[[var]]) == 1) { +				out_transformed[var] <- out[, var] +			} +			else { +				out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) +			} +		} +		assign("out_predicted", out_transformed, inherits = TRUE) +		mC <- CakeCost(out_transformed, observed, y = "value",  +			err = 'err', scaleVar = scaleVar) +		mC$penalties <- CakePenalties(odeparms, out_transformed, observed) +		mC$model <- mC$cost + mC$penalties; +		if (mC$model < cost.old) { +			if (!quiet)  +				cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model, +				  "\n") +			if (plot) { +				outtimes_plot = seq(min(observed$time), max(observed$time),  +				  length.out = 100) +				out_plot <- ode(y = odeini, times = outtimes_plot,  +				  func = mkindiff, parms = odeparms, atol=atol) +				out_transformed_plot <- data.frame(time = out_plot[,  +				  "time"]) +				for (var in names(mkinmod$map)) { +				  if (length(mkinmod$map[[var]]) == 1) { +					out_transformed_plot[var] <- out_plot[, var] +				  } +				  else { +					out_transformed_plot[var] <- rowSums(out_plot[,  +					  mkinmod$map[[var]]]) +				  } +				} +				plot(0, type = "n", xlim = range(observed$time),  +				  ylim = range(observed$value, na.rm = TRUE),  +				  xlab = "Time", ylab = "Observed") +				col_obs <- pch_obs <- 1:length(obs_vars) +				names(col_obs) <- names(pch_obs) <- obs_vars +				for (obs_var in obs_vars) { +				  points(subset(observed, name == obs_var, c(time,  +					value)), pch = pch_obs[obs_var], col = col_obs[obs_var]) +				} +				matlines(out_transformed_plot$time, out_transformed_plot[-1]) +				legend("topright", inset = c(0.05, 0.05), legend = obs_vars,  +				  col = col_obs, pch = pch_obs, lty = 1:length(pch_obs)) +			} +			assign("cost.old", mC$model, inherits = TRUE) +		} +		# HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost +		if(mC$penalties > 0){ +			#cat("Penalty adjustment: ",mC$penalties) +			mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res) +		} +		 +		return(mC) +	} +		 + +	 +	list(cost=cost,  +		get.predicted=get.predicted, +		get.calls=get.calls, set.calls=set.calls, +		get.best.cost=get.best.cost, reset.best.cost=reset.best.cost, +		set.error=set.error +	) +}
\ No newline at end of file diff --git a/CakeInit.R b/CakeInit.R new file mode 100644 index 0000000..3e609d5 --- /dev/null +++ b/CakeInit.R @@ -0,0 +1,39 @@ +#$Id: CakeInit.R 221 2011-11-01 14:12:45Z nelr $ + +# Purpose: Load the modules required by CAKE +# Call with chdir=TRUE so it can load our source from the current directory +# Tessella Project Reference: 6245 + +# Copyright (C) 2011 Syngenta + +#    This program is free software: you can redistribute it and/or modify +#    it under the terms of the GNU General Public License as published by +#    the Free Software Foundation, either version 3 of the License, or +#    (at your option) any later version. +#  +#    This program is distributed in the hope that it will be useful, +#    but WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#    GNU General Public License for more details. +#  +#    You should have received a copy of the GNU General Public License +#    along with this program.  If not, see <http://www.gnu.org/licenses/>. + +library(mkin) +source("CakeModel.R") +source("CakeOlsFit.R") +source("CakeIrlsFit.R") +source("CakeOlsPlot.R") +source("CakeIrlsPlot.R") +source("CakeNullPlot.R") +source("CakeCost.R") +source("CakePenalties.R") +source("CakeMcmcFit.R") +source("CakeSummary.R") + +options(width=1000) +options(error=recover) +options(warn=-1) + +# When running from C#, this must match the C# CAKE version +CAKE.version<-"1.4" diff --git a/CakeIrlsFit.R b/CakeIrlsFit.R new file mode 100644 index 0000000..bf20572 --- /dev/null +++ b/CakeIrlsFit.R @@ -0,0 +1,147 @@ +#$Id: CakeIrlsFit.R 216 2011-07-05 14:35:03Z nelr $ +# +# The CAKE R modules are based on mkin +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta +# Authors: Rob Nelson, Richard Smith +# Tessella Project Reference: 6245 +  +#    This program is free software: you can redistribute it and/or modify +#    it under the terms of the GNU General Public License as published by +#    the Free Software Foundation, either version 3 of the License, or +#    (at your option) any later version. +#  +#    This program is distributed in the hope that it will be useful, +#    but WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#    GNU General Public License for more details. +#  +#    You should have received a copy of the GNU General Public License +#    along with this program.  If not, see <http://www.gnu.org/licenses/>. +# +CakeIrlsFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)),  +    state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), lower = 0,  +    upper = Inf, fixed_parms = NULL, fixed_initials = names(mkinmod$diffs)[-1],  +    plot = FALSE, quiet = FALSE, err = NULL, weight = "none",  +    scaleVar = FALSE, atol=1e-6, control=list(),...)  +{ + +### This is a modification based on the "mkinfit" function. +### version 0.1 July 20 +###  +# This version has been modified to expect SFO parameterised as k and flow fractions +# Based on code in IRLSkinfit +    NAind <-which(is.na(observed$value)) +    mod_vars <- names(mkinmod$diffs) +    observed <- subset(observed, name %in% names(mkinmod$map)) +    ERR <- rep(1,nrow(observed)) +    observed <- cbind(observed,err=ERR) +    obs_vars = unique(as.character(observed$name)) +    if (is.null(names(parms.ini)))  +        names(parms.ini) <- mkinmod$parms +    mkindiff <- function(t, state, parms) { +        time <- t +        diffs <- vector() +        for (box in mod_vars) { +            diffname <- paste("d", box, sep = "_") +            diffs[diffname] <- with(as.list(c(time, state, parms)),  +                eval(parse(text = mkinmod$diffs[[box]]))) +        } +        return(list(c(diffs))) +    } +    if (is.null(names(state.ini)))  +        names(state.ini) <- mod_vars +    parms.fixed <- parms.ini[fixed_parms] +    optim_parms <- setdiff(names(parms.ini), fixed_parms) +    parms.optim <- parms.ini[optim_parms] +    state.ini.fixed <- state.ini[fixed_initials] +    optim_initials <- setdiff(names(state.ini), fixed_initials) +    state.ini.optim <- state.ini[optim_initials] +    state.ini.optim.boxnames <- names(state.ini.optim) +    if (length(state.ini.optim) > 0) { +        names(state.ini.optim) <- paste(names(state.ini.optim),  +            "0", sep = "_") +    } +	 +	costFunctions <- CakeInternalCostFunctions(mkinmod, state.ini.optim, state.ini.optim.boxnames,  +            state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, quiet, atol=atol) +	 +    ############### Iteratively Reweighted Least Squares############# +    ## Start with no weighting +    fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower,  +                      upper = upper,control=control,...) +	 +    if(length(control)==0) +      { +        irls.control <- list(maxIter=10,tol=1e-05) +        control <- list(irls.control=irls.control) +      }else{ +        if(is.null(control$irls.control)) +          { +            irls.control <- list(maxIter=10,tol=1e-05) +            control <- list(irls.control=irls.control) +          } +      } +    irls.control <- control$irls.control +    maxIter <- irls.control$maxIter +    tol <- irls.control$tol +    #### +    niter <- 1 +    ## insure one IRLS iteration +    diffsigma <- 100 +    olderr <- rep(1,length(mod_vars)) +    while(diffsigma>tol & niter<=maxIter) +      {       +        err <- sqrt(fit$var_ms_unweighted) +        ERR <- err[as.character(observed$name)] +        costFunctions$set.error(ERR) +        diffsigma <- sum((err-olderr)^2) +        cat("IRLS iteration at",niter, "; Diff in error variance ", diffsigma,"\n") +        olderr <- err +        fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, control=control, ...) +        niter <- niter+1        +        +        ### If not converged, reweight and fit                 +      } +	   +	########################################### +    fit$mkindiff <- mkindiff +    fit$map <- mkinmod$map +    fit$diffs <- mkinmod$diffs +     +	out_predicted <- costFunctions$get.predicted() +     +    # mkin_long_to_wide does not handle ragged data +    fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") +    names(fit$observed) <- c("time", as.vector(unique(observed$name))) +     +    predicted_long <- mkin_wide_to_long(out_predicted, time = "time") +    fit$predicted <- out_predicted +    fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) +    fit$start$type = c(rep("state", length(state.ini.optim)),  +        rep("deparm", length(parms.optim))) +    fit$start$lower <- lower +    fit$start$upper <- upper +    fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) +    fit$fixed$type = c(rep("state", length(state.ini.fixed)),  +        rep("deparm", length(parms.fixed))) + +    fit$errmin <- CakeChi2(observed, predicted_long, obs_vars, parms.optim, state.ini.optim) +    parms.all = c(fit$par, parms.fixed) +	fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed) +    fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)),  +        DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) +    for (obs_var in obs_vars) { +        type = names(mkinmod$map[[obs_var]])[1] +        fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all) +    } +     +    data <- merge(observed, predicted_long, by = c("time", "name")) +     +     +    names(data) <- c("time", "variable", "observed","err-var", "predicted") +    data$residual <- data$observed - data$predicted +    data$variable <- ordered(data$variable, levels = obs_vars) +    fit$data <- data[order(data$variable, data$time), ] +    class(fit) <- c("CakeFit", "mkinfit", "modFit")  +    return(fit) +} diff --git a/CakeIrlsPlot.R b/CakeIrlsPlot.R new file mode 100644 index 0000000..2de8910 --- /dev/null +++ b/CakeIrlsPlot.R @@ -0,0 +1,25 @@ +#$Id: CakeIrlsPlot.R 216 2011-07-05 14:35:03Z nelr $ +# Generates fitted curves so the GUI can plot them +# Author: Rob Nelson (Tessella) +# Tessella Project Reference: 6245 + +#    Copyright (C) 2011  Syngenta +#  +#    This program is free software: you can redistribute it and/or modify +#    it under the terms of the GNU General Public License as published by +#    the Free Software Foundation, either version 3 of the License, or +#    (at your option) any later version. +#  +#    This program is distributed in the hope that it will be useful, +#    but WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#    GNU General Public License for more details. +#  +#    You should have received a copy of the GNU General Public License +#    along with this program.  If not, see <http://www.gnu.org/licenses/>. + +CakeIrlsPlot <- function(fit, xlim = range(fit$data$time), ...) +{ +   CakeOlsPlot(fit, xlim, ...) +} + diff --git a/CakeMcmcFit.R b/CakeMcmcFit.R new file mode 100644 index 0000000..209f904 --- /dev/null +++ b/CakeMcmcFit.R @@ -0,0 +1,292 @@ +# $Id: CakeMcmcFit.R 216 2011-07-05 14:35:03Z nelr $ +# The CAKE R modules are based on mkin,  +# Based on mcmckinfit as modified by Bayer +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011  Syngenta +# Author: Rob Nelson +# Tessella Project Reference: 6245 + +#    This program is free software: you can redistribute it and/or modify +#    it under the terms of the GNU General Public License as published by +#    the Free Software Foundation, either version 3 of the License, or +#    (at your option) any later version. +#  +#    This program is distributed in the hope that it will be useful, +#    but WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#    GNU General Public License for more details. +#  +#    You should have received a copy of the GNU General Public License +#    along with this program.  If not, see <http://www.gnu.org/licenses/>. + +CakeMcmcFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)),  +    state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), lower = 0,  +    upper = Inf, fixed_parms = NULL, fixed_initials = names(mkinmod$diffs)[-1],  +    plot = FALSE, quiet = FALSE, err = NULL, weight = "none",  +    scaleVar = FALSE, commonsigma=FALSE,jump = NULL,prior = NULL, +    wvar0=0.1,niter = 1000,  +    outputlength = niter, burninlength = 0, updatecov = niter,  +    ntrydr = 1, drscale = NULL, verbose = TRUE,fitstart=TRUE,seed=NULL,atol=1e-6,...)  +{ + +    NAind <-which(is.na(observed$value)) +    mod_vars <- names(mkinmod$diffs) +    observed <- subset(observed, name %in% names(mkinmod$map)) +    ERR <- rep(1,nrow(observed)) +    observed <- cbind(observed,err=ERR) +    obs_vars = unique(as.character(observed$name)) +    if (is.null(names(parms.ini)))  +        names(parms.ini) <- mkinmod$parms +    mkindiff <- function(t, state, parms) { +        time <- t +        diffs <- vector() +        for (box in mod_vars) { +            diffname <- paste("d", box, sep = "_") +            diffs[diffname] <- with(as.list(c(time, state, parms)),  +                eval(parse(text = mkinmod$diffs[[box]]))) +        } +        return(list(c(diffs))) +    } +    if (is.null(names(state.ini)))  +        names(state.ini) <- mod_vars +    parms.fixed <- parms.ini[fixed_parms] +    optim_parms <- setdiff(names(parms.ini), fixed_parms) +    parms.optim <- parms.ini[optim_parms] +    state.ini.fixed <- state.ini[fixed_initials] +    optim_initials <- setdiff(names(state.ini), fixed_initials) +    state.ini.optim <- state.ini[optim_initials] +    state.ini.optim.boxnames <- names(state.ini.optim) +    if (length(state.ini.optim) > 0) { +        names(state.ini.optim) <- paste(names(state.ini.optim),  +            "0", sep = "_") +    } +    +   	costFunctions <- CakeInternalCostFunctions(mkinmod, state.ini.optim, state.ini.optim.boxnames,  +                        state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, quiet, atol=atol) +	bestIteration <- -1; +	costWithStatus <- function(P, ...){ +		r <- costFunctions$cost(P) +		if(r$cost == costFunctions$get.best.cost()) { +			bestIteration<<-costFunctions$get.calls(); +			 cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n') +		} +		cat("MCMC Call no.", costFunctions$get.calls(), '\n') +		return(r) +	} +	   +    # Set the seed +    if ( is.null(seed) ) { +      # No seed so create a random one so there is something to report +      seed = runif(1,0,2^31-1) +    } +    seed = as.integer(seed) +    set.seed(seed) +     +    if(fitstart==TRUE) +      { +     ## ############# Get Initial Paramtervalues   ############# +    ## Start with no weighting +    fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower,  +                      upper = upper,...) +	 +    if(commonsigma==TRUE){ +      #observed$err <- rep(1,nrow(observed)) +      cov0 <- summary(fit)$cov.scaled*2.4^2/length(fit$par) +	  costFunctions$set.calls(0); costFunctions$reset.best.cost() +      res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose) +      #res <- modMCMC(cost,fit$par,lower=lower,upper=upper,niter=niter) +    }else{ +      ## ############## One reweighted estimation ############################  +      ## Estimate the error variance(sd)      +      tmpres <- fit$residuals +      oldERR <- observed$err +      err <- rep(NA,length(mod_vars)) +      for(i in 1:length(mod_vars)) +        { +          box <- mod_vars[i] +          ind <- which(names(tmpres)==box) +          tmp <- tmpres[ind] +          err[i] <- sd(tmp) +        } +      names(err) <- mod_vars +      ERR <- err[as.character(observed$name)] +      observed$err <-ERR +	  costFunctions$set.error(ERR) +      olderr <- rep(1,length(mod_vars)) +      diffsigma <- sum((err-olderr)^2) +      ## At least do one iteration step to get a weighted LS +      fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, ...) +      ## Use this as the Input for MCMC algorithm +      ## ########################## +	  fs <- summary(fit) +      cov0 <- if(all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled*2.4^2/length(fit$par) +      var0 <- fit$var_ms_unweighted +	  costFunctions$set.calls(0); costFunctions$reset.best.cost() +      res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose) +    +    } +   }else { +		stop('fitstart=FALSE code removed until needed') +	} +    # Construct the fit object to return +    fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) +    fit$start$type = c(rep("state", length(state.ini.optim)),  +        rep("deparm", length(parms.optim))) +    fit$start$lower <- lower +    fit$start$upper <- upper +    fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) +    fit$fixed$type = c(rep("state", length(state.ini.fixed)),  +        rep("deparm", length(parms.fixed))) + +    fit$mkindiff <- mkindiff +    fit$map <- mkinmod$map +    fit$diffs <- mkinmod$diffs + +    # mkin_long_to_wide does not handle ragged data +    fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") +    #names(fit$observed) <- c("time", as.vector(unique(observed$name))) +    fns <- function(str) { +      fields <- strsplit(str, "\\.")[[1]] +      if (fields[1] == "value") { +         return(fields[2]) +      } +      else { +         return(str) +      } +    } +    names(fit$observed) <- sapply(names(fit$observed), fns) +    +   # Replace mean from modFit with mean from modMCMC +   fnm <- function(x) mean(res$pars[,x]) +   fit$par <- sapply(dimnames(res$pars)[[2]],fnm) +   fit$bestpar <- res$bestpar +   fit$costfn <- costWithStatus +    +   # Disappearence times +   parms.all <- c(fit$par, parms.fixed) +   obs_vars = unique(as.character(observed$name)) +   fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)),  +        DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) +    for (obs_var in obs_vars) { +        type = names(mkinmod$map[[obs_var]])[1] +        fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all) +    } + +     +   # Fits to data +   odeini <- state.ini +   odeparms <- parms.all +   # If this step modelled the start amount, include it. Lookup in vectors returns NA not null if missing +   if(!is.na(odeparms["Parent_0"])) { odeini['Parent'] = odeparms["Parent_0"] } +    +   # Solve the system +   evalparse <- function(string) +   { +      eval(parse(text=string), as.list(c(odeparms, odeini))) +   } +   +   # Ensure initial state is at time 0 +   obstimes <- unique(c(0,observed$time)) + +   out <- ode( +      y = odeini, +      times = obstimes, +      func = mkindiff,  +      parms = odeparms, +    ) +     +   # Output transformation for models with unobserved compartments like SFORB +   out_transformed <- data.frame(time = out[,"time"]) +   for (var in names(mkinmod$map)) { +      if(length(mkinmod$map[[var]]) == 1) { +         out_transformed[var] <- out[, var] +      } else { +         out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) +      } +   } +   fit$predicted <- out_transformed +   fit$penalties <- CakePenaltiesLong(odeparms, out_transformed, observed) + +   predicted_long <- mkin_wide_to_long(out_transformed,time="time") +   obs_vars = unique(as.character(observed$name)) +   fit$errmin <- CakeChi2(observed, predicted_long, obs_vars, parms.optim, state.ini.optim) + +   data<-observed +   data$err<-rep(NA,length(data$time)) +   data<-merge(data, predicted_long, by=c("time","name")) +   names(data)<-c("time", "variable", "observed","err-var", "predicted") +   data$residual<-data$observed-data$predicted +   data$variable <- ordered(data$variable, levels = obs_vars) +   fit$data <- data[order(data$variable, data$time), ] + +   sq <- data$residual * data$residual +   fit$ssr <- sum(sq) +    +   fit$seed = seed +    +    fit$res <- res +    class(fit) <- c("CakeMcmcFit", "mkinfit", "modFit") +    return(fit) +} + + +# Summarise a fit +summary.CakeMcmcFit <- function(object, data = TRUE, distimes = TRUE, ff = TRUE, cov = FALSE,...) { +  param  <- object$par +  pnames <- names(param) +  p      <- length(param) +  #covar  <- try(solve(0.5*object$hessian), silent = TRUE)   # unscaled covariance +  mcmc <- object$res +  covar <- cov(mcmc$pars) + +  rdf    <- object$df.residual +   +    message <- "ok" +    rownames(covar) <- colnames(covar) <-pnames +     +    #se     <- sqrt(diag(covar) * resvar) +    fnse <- function(x) sd(mcmc$pars[,x])	#/sqrt(length(mcmc$pars[,x])) +    se <- sapply(dimnames(mcmc$pars)[[2]],fnse) +     +    tval      <- param / se + +  if (!all(object$start$lower >=0)) { +    message <- "Note that the one-sided t-test may not be appropriate if +      parameter values below zero are possible." +    warning(message) +  } else message <- "ok" + +   # Parameter fits +    alpha <- 0.05 +    lci <- param + qt(alpha/2,rdf)*se +    uci <- param + qt(1-alpha/2,rdf)*se +   param <- cbind(param, se, tval, pt(tval, rdf, lower.tail = FALSE), lci, uci) +   dimnames(param) <- list(pnames, c("Estimate", "Std. Error", "t value", "Pr(>t)", "Lower CI", "Upper CI")) +        +   # Residuals from mean of MCMC fit +   resvar <- object$ssr/ rdf +   modVariance <- object$ssr / length(object$data$residual) +   +   ans <- list(residuals = object$data$residuals, +                residualVariance = resvar, +                sigma = sqrt(resvar), +                modVariance = modVariance, +                df = c(p, rdf), cov.unscaled = covar, +                cov.scaled = covar * resvar, +                info = object$info, niter = object$iterations, +                stopmess = message, +                par = param) +     +  ans$diffs <- object$diffs +  ans$data <- object$data +  ans$additionalstats = CakeAdditionalStats(object$data) +  ans$seed <- object$seed + +  ans$start <- object$start +  ans$fixed <- object$fixed +  ans$errmin <- object$errmin  +  if(distimes) ans$distimes <- object$distimes +  if(ff) ans$ff <- object$ff +  if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB +  class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.modFit")  +  return(ans)   +} diff --git a/CakeModel.R b/CakeModel.R new file mode 100644 index 0000000..a93d7af --- /dev/null +++ b/CakeModel.R @@ -0,0 +1,261 @@ +# Was Id: mkinmod.R 71 2010-09-12 01:13:36Z jranke  +# $Id: CakeModel.R 216 2011-07-05 14:35:03Z nelr $ + +# The CAKE R modules are based on mkin +# Portions Johannes Ranke, 2010 +# Contact: mkin-devel@lists.berlios.de + +# This version has been modified to parameterise SFO as k and flow fractions +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011  Syngenta +# Tessella Project Reference: 6245 + +#    This program is free software: you can redistribute it and/or modify +#    it under the terms of the GNU General Public License as published by +#    the Free Software Foundation, either version 3 of the License, or +#    (at your option) any later version. +#  +#    This program is distributed in the hope that it will be useful, +#    but WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#    GNU General Public License for more details. +#  +#    You should have received a copy of the GNU General Public License +#    along with this program.  If not, see <http://www.gnu.org/licenses/>. + +CakeModel <- function(...) +{ +  spec <- list(...) +  obs_vars <- names(spec) + +  # The returned model will be a list of character vectors, containing +  # differential equations, parameter names and a mapping from model variables +  # to observed variables. If possible, a matrix representation of the  +  # differential equations is included +  parms <- vector() +  diffs <- vector() +  map <- list() +  if(spec[[1]]$type %in% c("FOMC", "DFOP", "HS")) { +    mat = FALSE  +    if(!is.null(spec[[1]]$to)) { +      message <- paste("Only constant formation fractions over time are implemented.", +        "Depending on the reason for the time dependence of degradation, this may be unrealistic", +        sep="\n") +      warning(message) +    } else message <- "ok" +  } else mat = TRUE +   +  mat = FALSE # XYZZY Not implemented yet assuming it should be +  nlt = list() + +  # Establish list of differential equations +  for (varname in obs_vars) +  { +    if(is.null(spec[[varname]]$type)) stop( +      "Every argument to mkinmod must be a list containing a type component") +    if(!spec[[varname]]$type %in% c("SFO", "FOMC", "DFOP", "HS", "SFORB")) stop( +      "Available types are SFO, FOMC, DFOP, HS and SFORB only") +    new_parms <- vector() + +    # New (sub)compartments (boxes) needed for the model type +    new_boxes <- switch(spec[[varname]]$type, +      SFO = varname, +      FOMC = varname, +      DFOP = varname, +      HS = varname, +      SFORB = paste(varname, c("free", "bound"), sep="_") +    ) +    map[[varname]] <- new_boxes +    names(map[[varname]]) <- rep(spec[[varname]]$type, length(new_boxes)) + +    # Start a new differential equation for each new box +    new_diffs <- paste("d_", new_boxes, " =", sep="") + +    # Turn on sink if not specified otherwise +    if(is.null(spec[[varname]]$sink)) spec[[varname]]$sink <- TRUE +     +    #@@@ADD SFO k HERE !!!!!!!!!!!!! +    # Construct and add SFO term and add SFO parameters if needed +    if(spec[[varname]]$type == "SFO") { +      # From p. 53 of the FOCUS kinetics report +      k_term <- paste("k", new_boxes[[1]], sep="_") +      nonlinear_term <- paste(k_term, new_boxes[[1]], sep=" * ") +      spec[[varname]]$nlt<-nonlinear_term +      new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) +      new_parms <- k_term +      ff <- vector() +    }  + +    # Construct and add FOMC term and add FOMC parameters if needed +    if(spec[[varname]]$type == "FOMC") { +      if(match(varname, obs_vars) != 1) { +        stop("Type FOMC is only possible for the first compartment, which is assumed to be the source compartment") +      } +#      if(spec[[varname]]$sink == FALSE) { +#        stop("Turning off the sink for the FOMC model is not implemented") +#      } +      # From p. 53 of the FOCUS kinetics report +      nonlinear_term <- paste("(alpha/beta) * ((time/beta) + 1)^-1 *", new_boxes[[1]]) +      spec[[varname]]$nlt<-nonlinear_term +      new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) +      new_parms <- c("alpha", "beta") +      ff <- vector() +    }  + +    # Construct and add DFOP term and add DFOP parameters if needed +    if(spec[[varname]]$type == "DFOP") { +      if(match(varname, obs_vars) != 1) { +        stop("Type DFOP is only possible for the first compartment, which is assumed to be the source compartment") +      } +#      if(spec[[varname]]$sink == FALSE) { +#        stop("Turning off the sink for the DFOP model is not implemented") +#      } +      # From p. 57 of the FOCUS kinetics report +      nonlinear_term <- paste("((k1 * g * exp(-k1 * time) + k2 * (1 - g) * exp(-k2 * time)) / (g * exp(-k1 * time) + (1 - g) * exp(-k2 * time))) *", new_boxes[[1]]) +      spec[[varname]]$nlt<-nonlinear_term +      new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) +      new_parms <- c("k1", "k2", "g") +      ff <- vector() +    }  + +    # Construct and add HS term and add HS parameters if needed +    if(spec[[varname]]$type == "HS") { +      if(match(varname, obs_vars) != 1) { +        stop("Type HS is only possible for the first compartment, which is assumed to be the source compartment") +      } +#      if(spec[[varname]]$sink == FALSE) { +#        stop("Turning off the sink for the HS model is not implemented") +#      } +      # From p. 55 of the FOCUS kinetics report +#      nonlinear_term <- paste("ifelse(time <= tb, k1, k2)", "*", new_boxes[[1]]) +      nonlinear_term <- paste("((k1 - k2) / ( 1 + exp( 10*(time - tb) ) ) + k2)", "*", new_boxes[[1]]) +      spec[[varname]]$nlt<-nonlinear_term +      new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) +      new_parms <- c("k1", "k2", "tb") +      ff <- vector() +    }  + +    # Construct terms for transfer to sink and add if appropriate + +    #@@@@ REMOVE THIS ????? +    if(spec[[varname]]$sink) { +      # Add first-order sink term to first (or only) box for SFO and SFORB +#      if(spec[[varname]]$type %in% c("SFO", "SFORB")) { +      if(spec[[varname]]$type == "SFORB") { +        k_compound_sink <- paste("k", new_boxes[[1]], "sink", sep="_") +        sink_term <- paste("-", k_compound_sink, "*", new_boxes[[1]]) +        new_diffs[[1]] <- paste(new_diffs[[1]], sink_term) +        new_parms <- k_compound_sink +      } +    } +    +    # Add reversible binding if appropriate +    if(spec[[varname]]$type == "SFORB") { +      k_free_bound <- paste("k", varname, "free", "bound", sep="_")       +      k_bound_free <- paste("k", varname, "bound", "free", sep="_")       +      reversible_binding_terms <- c( +        paste("-", k_free_bound, "*", new_boxes[[1]], "+", k_bound_free, "*", new_boxes[[2]]), +        paste("+", k_free_bound, "*", new_boxes[[1]], "-", k_bound_free, "*", new_boxes[[2]])) +      new_diffs <- paste(new_diffs, reversible_binding_terms) +      new_parms <- c(new_parms, k_free_bound, k_bound_free) +    }  + +    # Add observed variable to model +    parms <- c(parms, new_parms) +    names(new_diffs) <- new_boxes +    diffs <- c(diffs, new_diffs) +  } +  # Transfer between compartments +  for (varname in obs_vars) { +    to <- spec[[varname]]$to +    if(!is.null(to)) { +      origin_box <- switch(spec[[varname]]$type, +        SFO = varname, +        FOMC = varname, +        DFOP = varname, +        HS = varname, +        SFORB = paste(varname, "free", sep="_")) +      fraction_left <- NULL +      for (target in to) { +        target_box <- switch(spec[[target]]$type, +          SFO = target, +          SFORB = paste(target, "free", sep="_")) +        # SFO is no longer special +        #if(spec[[varname]]$type %in% c("SFO", "SFORB")) { +        if(spec[[varname]]$type == "SFORB") { +          k_from_to <- paste("k", origin_box, target_box, sep="_") +          diffs[[origin_box]] <- paste(diffs[[origin_box]], "-",  +            k_from_to, "*", origin_box) +          diffs[[target_box]] <- paste(diffs[[target_box]], "+",  +            k_from_to, "*", origin_box) +          parms <- c(parms, k_from_to) +        } +        # Handle SFO like the others +#        if(spec[[varname]]$type %in% c("FOMC", "DFOP", "HS")) { +        if(spec[[varname]]$type %in% c("FOMC", "DFOP", "HS", "SFO")) { +          if ( length(to)==1 && !spec[[varname]]$sink ) { +            # There is only one output, so no need for any flow fractions. Just add the whole flow from the parent +            diffs[[target_box]] <- paste(diffs[[target_box]], "+", spec[[varname]]$nlt) +          } else { +            fraction_to_target = paste("f", varname, "to", target, sep="_") +            fraction_not_to_target = paste("(1 - ", fraction_to_target, ")", sep="") +            if(is.null(fraction_left)) { +              fraction_really_to_target = fraction_to_target +              fraction_left = fraction_not_to_target +            } else { +              # If this is the last output and there is no sink, it gets what's left +              if ( target==tail(to,1) && !spec[[varname]]$sink ) { +                 fraction_really_to_target = fraction_left +              } else { +# (1-fa)fb version +#                 fraction_really_to_target = paste(fraction_left, " * ", fraction_to_target, sep="") +#                 fraction_left = paste(fraction_left, " * ", fraction_not_to_target, sep="") +# fb version +                 fraction_really_to_target = fraction_to_target +                 fraction_left = paste(fraction_left, " - ", fraction_to_target, sep="") +              } +            } +            ff[target_box] = fraction_really_to_target +            diffs[[target_box]] <- paste(diffs[[target_box]], "+", ff[target_box], "*", spec[[varname]]$nlt) +            # Add the flow fraction parameter (if it exists) +            if ( target!=tail(to,1) || spec[[varname]]$sink ) { +              parms <- c(parms, fraction_to_target) +            } +          } +        } +      } +    } +  } +  model <- list(diffs = diffs, parms = parms, map = map) + +  # Create coefficient matrix if appropriate +  if (mat) { +    boxes <- names(diffs) +    n <- length(boxes) +    m <- matrix(nrow=n, ncol=n, dimnames=list(boxes, boxes)) +    for (from in boxes) { +      for (to in boxes) { +        if (from == to) { +          #@@@@ OMIT NEXT LINE? !!!!! +          k.candidate = paste("k", from, c(boxes, "sink"), sep="_") +          k.candidate = sub("free.*bound", "free_bound", k.candidate) +          k.candidate = sub("bound.*free", "bound_free", k.candidate) +          k.effective = intersect(model$parms, k.candidate) +          m[from,to] = ifelse(length(k.effective) > 0, +              paste("-", k.effective, collapse = " "), "0") +        } else { +          k.candidate = paste("k", from, to, sep="_") +          k.candidate = sub("free.*bound", "free_bound", k.candidate) +          k.candidate = sub("bound.*free", "bound_free", k.candidate) +          k.effective = intersect(model$parms, k.candidate) +          m[to, from] = ifelse(length(k.effective) > 0, +              k.effective, "0") +        } +      } +    } +    model$coefmat <- m +  } + +  if (exists("ff")) model$ff = ff +  class(model) <- "mkinmod" +  invisible(model) +} diff --git a/CakeNullPlot.R b/CakeNullPlot.R new file mode 100644 index 0000000..3eacd67 --- /dev/null +++ b/CakeNullPlot.R @@ -0,0 +1,127 @@ +#$Id: CakeNullPlot.R 216 2011-07-05 14:35:03Z nelr $ +# Generates model curves so the GUI can plot them. Used when all params are fixed so there was no fit +# The CAKE R modules are based on mkin +# Based on code in IRLSkinfit +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011  Syngenta +# Author: Rob Nelson (Tessella) +# Tessella Project Reference: 6245 + +#    This program is free software: you can redistribute it and/or modify +#    it under the terms of the GNU General Public License as published by +#    the Free Software Foundation, either version 3 of the License, or +#    (at your option) any later version. +#  +#    This program is distributed in the hope that it will be useful, +#    but WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#    GNU General Public License for more details. +#  +#    You should have received a copy of the GNU General Public License +#    along with this program.  If not, see <http://www.gnu.org/licenses/>. + +CakeNullPlot <- function(model, parms.ini, state.ini, observed, xlim = range(observed$time), digits = max(3, getOption("digits") - 3), ...) +{ +  # Print the fixed values +  fixed <- data.frame(value = c(state.ini, parms.ini)) +  fixed$type = c(rep("state", length(state.ini)), rep("deparm", length(parms.ini))) +  cat("\nFixed parameter values:\n") +  print(fixed) + +  # Print disappearence times +  obs_vars = unique(as.character(observed$name)) +  distimes <- data.frame(DT50 = rep(NA, length(obs_vars)),  +        DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) +  for (obs_var in obs_vars) { +      type = names(model$map[[obs_var]])[1] +      if(exists("DT50")) distimes[obs_var, ] = c(DT50, DT90) +      distimes[obs_var, ] = CakeDT(type,obs_var,parms.ini) +   } +   cat("\nEstimated disappearance times:\n") +   print(distimes, digits=digits,...) + +   # Plot the model + +  outtimes <- seq(0, xlim[2], length.out=101) + +  odeini <- state.ini +  odenames <- model$parms +  odeparms <- params.ini + +  # Solve the system +  evalparse <- function(string) +  { +    eval(parse(text=string), as.list(c(odeparms, odeini))) +  } +   +    mod_vars <- names(model$diffs) +    mkindiff <- function(t, state, parms) { +        time <- t +        diffs <- vector() +        for (box in mod_vars) { +            diffname <- paste("d", box, sep = "_") +            diffs[diffname] <- with(as.list(c(time, state, parms)),  +                eval(parse(text = model$diffs[[box]]))) +        } +        return(list(c(diffs))) +    } +   +   # Ensure initial state is at time 0 +   obstimes <- unique(c(0,observed$time)) +    out <- ode( +      y = odeini, +      times = obstimes, +      func = mkindiff,  +      parms = odeparms, +    ) +     +  # Output transformation for models with unobserved compartments like SFORB +  out_transformed <- data.frame(time = out[,"time"]) +  for (var in names(model$map)) { +    if(length(model$map[[var]]) == 1) { +      out_transformed[var] <- out[, var] +    } else { +      out_transformed[var] <- rowSums(out[, model$map[[var]]]) +    } +  }     +   +   predicted_long <- mkin_wide_to_long(out_transformed,time="time") +  # Calculate chi2 error levels according to FOCUS (2006) +   errmin<-CakeChi2(observed, predicted_long, obs_vars, c(), c()) +   cat("\nChi2 error levels in percent:\n") +   errmin$err.min <- 100 * errmin$err.min +   print(errmin, digits=digits,...) + +   +   # Fit to data +   data<-observed +   data$err<-rep(NA,length(data$time)) +   data<-merge(data, predicted_long, by=c("time","name")) +   names(data)<-c("time", "variable", "observed","err-var", "predicted") +   data$residual<-data$observed-data$predicted +   data<-data[order(data$variable,data$time),] +   cat("\nData:\n") +   print(format(data, digits = digits, scientific = FALSE,...), row.names = FALSE) +    +   +    out <- ode( +      y = odeini, +      times = outtimes, +      func = mkindiff,  +      parms = odeparms, +    ) +     +  # Output transformation for models with unobserved compartments like SFORB +  out_transformed <- data.frame(time = out[,"time"]) +  for (var in names(model$map)) { +    if(length(model$map[[var]]) == 1) { +      out_transformed[var] <- out[, var] +    } else { +      out_transformed[var] <- rowSums(out[, model$map[[var]]]) +    } +  }     +    +   cat("\n<PLOT MODEL START>\n") +   print(out_transformed) +   cat("<PLOT MODEL END>\n") +} + diff --git a/CakeOlsFit.R b/CakeOlsFit.R new file mode 100644 index 0000000..75ac471 --- /dev/null +++ b/CakeOlsFit.R @@ -0,0 +1,326 @@ +# Originally: mkinfit.R 87 2010-12-09 07:31:59Z jranke $ + +# Based on code in mkinfit +# Portions Johannes Ranke 2010 +# Contact: mkin-devel@lists.berlios.de +# The summary function is an adapted and extended version of summary.modFit +# from the FME package, v 1.1 by Soetart and Petzoldt, which was in turn +# inspired by summary.nls.lm + +#$Id: CakeOlsFit.R 216 2011-07-05 14:35:03Z nelr $ +# This version has been modified to expect SFO parameterised as k and flow fractions +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011  Syngenta +# Authors: Rob Nelson, Richard Smith +# Tessella Project Reference: 6245 + +#    This program is free software: you can redistribute it and/or modify +#    it under the terms of the GNU General Public License as published by +#    the Free Software Foundation, either version 3 of the License, or +#    (at your option) any later version. +#  +#    This program is distributed in the hope that it will be useful, +#    but WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#    GNU General Public License for more details. +#  +#    You should have received a copy of the GNU General Public License +#    along with this program.  If not, see <http://www.gnu.org/licenses/>. + +CakeOlsFit <- function(mkinmod, observed, +  parms.ini = rep(0.1, length(mkinmod$parms)), +  state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)),  +  lower = 0, upper = Inf, +  fixed_parms = NULL, +  fixed_initials = names(mkinmod$diffs)[-1], +  eigen = TRUE, +  plot = FALSE, quiet = FALSE, +  err = NULL, weight = "none", scaleVar = FALSE, +  atol = 1e-6, +  ...) +{ +  mod_vars <- names(mkinmod$diffs) +  # Subset dataframe with mapped (modelled) variables +  observed <- subset(observed, name %in% names(mkinmod$map)) +  # Get names of observed variables +  obs_vars = unique(as.character(observed$name)) + +  # Name the parameters if they are not named yet +  if(is.null(names(parms.ini))) names(parms.ini) <- mkinmod$parms + +  # Name the inital parameter values if they are not named yet +  if(is.null(names(state.ini))) names(state.ini) <- mod_vars + +  # Parameters to be optimised +  parms.fixed <- parms.ini[fixed_parms] +  optim_parms <- setdiff(names(parms.ini), fixed_parms) +  parms.optim <- parms.ini[optim_parms] + +  state.ini.fixed <- state.ini[fixed_initials] +  optim_initials <- setdiff(names(state.ini), fixed_initials) +  state.ini.optim <- state.ini[optim_initials] +  state.ini.optim.boxnames <- names(state.ini.optim) +  if(length(state.ini.optim) > 0) { +      names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_") +  } + +  # Decide if the solution of the model can be based on a simple analytical +  # formula, the spectral decomposition of the matrix (fundamental system) +  # or a numeric ode solver from the deSolve package +  if (length(mkinmod$map) == 1) { +    solution = "analytical" +  } else { +    if (is.matrix(mkinmod$coefmat) & eigen) solution = "eigen" +    else solution = "deSolve" +  } + +  # Create a function calculating the differentials specified by the model +  # if necessary +  if(solution == "deSolve") { +    mkindiff <- function(t, state, parms) { +      time <- t +      diffs <- vector() +      for (box in mod_vars) +      { +        diffname <- paste("d", box, sep="_")       +        diffs[diffname] <- with(as.list(c(time,state, parms)), +          eval(parse(text=mkinmod$diffs[[box]]))) +      } +      return(list(c(diffs))) +    }  +  } + +  cost.old <- 1e100 +  calls <- 0 +  out_predicted <- NA +  # Define the model cost function +  cost <- function(P) +  { +    assign("calls", calls+1, inherits=TRUE) +    if(length(state.ini.optim) > 0) { +      odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed) +      names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed)) +    } else odeini <- state.ini.fixed + +    odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], parms.fixed) + +    # Ensure initial state is at time 0 +    outtimes = unique(c(0,observed$time)) +    evalparse <- function(string) +    { +      eval(parse(text=string), as.list(c(odeparms, odeini))) +    } + +    # Solve the system +    if (solution == "analytical") { +      parent.type = names(mkinmod$map[[1]])[1]   +      parent.name = names(mkinmod$diffs)[[1]] +      o <- switch(parent.type, +        SFO = SFO.solution(outtimes,  +            evalparse(parent.name), +            evalparse(paste("k", parent.name, sep="_"))), +#            evalparse("k")), +#            evalparse(paste("k", parent.name, "sink", sep="_"))), +        FOMC = FOMC.solution(outtimes, +            evalparse(parent.name), +            evalparse("alpha"), evalparse("beta")), +        DFOP = DFOP.solution(outtimes, +            evalparse(parent.name), +            evalparse("k1"), evalparse("k2"), +            evalparse("g")), +        HS = HS.solution(outtimes, +            evalparse(parent.name), +            evalparse("k1"), evalparse("k2"), +            evalparse("tb")), +        SFORB = SFORB.solution(outtimes, +            evalparse(parent.name), +            evalparse(paste("k", parent.name, "bound", sep="_")), +            evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")), +            evalparse(paste("k", parent.name, "sink", sep="_"))) +      ) +      out <- cbind(outtimes, o) +      dimnames(out) <- list(outtimes, c("time", sub("_free", "", parent.name))) +    } +    if (solution == "eigen") { +      coefmat.num <- matrix(sapply(as.vector(mkinmod$coefmat), evalparse),  +        nrow = length(mod_vars)) +      e <- eigen(coefmat.num) +      c <- solve(e$vectors, odeini) +      f.out <- function(t) { +        e$vectors %*% diag(exp(e$values * t), nrow=length(mod_vars)) %*% c +      } +      o <- matrix(mapply(f.out, outtimes),  +        nrow = length(mod_vars), ncol = length(outtimes)) +      dimnames(o) <- list(mod_vars, outtimes) +      out <- cbind(time = outtimes, t(o)) +    }  +    if (solution == "deSolve")   +    { +      out <- ode( +        y = odeini, +        times = outtimes, +        func = mkindiff,  +        parms = odeparms, +        atol = atol +      ) +    } +   +    # Output transformation for models with unobserved compartments like SFORB +    out_transformed <- data.frame(time = out[,"time"]) +    for (var in names(mkinmod$map)) { +      if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") { +        out_transformed[var] <- out[, var] +      } else { +        out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) +      } +    }     +    assign("out_predicted", out_transformed, inherits=TRUE) + +    mC <- CakeCost(out_transformed, observed, y = "value", +      err = err, weight = weight, scaleVar = scaleVar) +	mC$penalties <- CakePenalties(odeparms, out_transformed, observed) +	mC$model <- mC$cost + mC$penalties; +	if (mC$model < cost.old) { +		if (!quiet)  +			cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model, +			  "\n") + +      # Plot the data and current model output if requested +      if(plot) { +        outtimes_plot = seq(min(observed$time), max(observed$time), length.out=100) +        if (solution == "analytical") { +          o_plot <- switch(parent.type, +            SFO = SFO.solution(outtimes_plot,  +                evalparse(parent.name), +                evalparse(paste("k", parent.name, sep="_"))), +#                evalparse(paste("k", parent.name, "sink", sep="_"))), +            FOMC = FOMC.solution(outtimes_plot, +                evalparse(parent.name), +                evalparse("alpha"), evalparse("beta")), +            DFOP = DFOP.solution(outtimes_plot, +                evalparse(parent.name), +                evalparse("k1"), evalparse("k2"), +                evalparse("g")), +            HS = HS.solution(outtimes_plot, +                evalparse(parent.name), +                evalparse("k1"), evalparse("k2"), +                evalparse("tb")), +            SFORB = SFORB.solution(outtimes_plot, +                evalparse(parent.name), +                evalparse(paste("k", parent.name, "bound", sep="_")), +                evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")), +                evalparse(paste("k", parent.name, "sink", sep="_"))) +          ) +          out_plot <- cbind(outtimes_plot, o_plot) +          dimnames(out_plot) <- list(outtimes_plot, c("time", sub("_free", "", parent.name))) +        } +        if(solution == "eigen") { +          o_plot <- matrix(mapply(f.out, outtimes_plot),  +            nrow = length(mod_vars), ncol = length(outtimes_plot)) +          dimnames(o_plot) <- list(mod_vars, outtimes_plot) +          out_plot <- cbind(time = outtimes_plot, t(o_plot)) +        }  +        if (solution == "deSolve") { +          out_plot <- ode( +            y = odeini, +            times = outtimes_plot, +            func = mkindiff,  +            parms = odeparms) +        } +        out_transformed_plot <- data.frame(time = out_plot[,"time"]) +        for (var in names(mkinmod$map)) { +          if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") { +            out_transformed_plot[var] <- out_plot[, var] +          } else { +            out_transformed_plot[var] <- rowSums(out_plot[, mkinmod$map[[var]]]) +          } +        }     +        out_transformed_plot <<- out_transformed_plot + +        plot(0, type="n",  +          xlim = range(observed$time), ylim = range(observed$value, na.rm=TRUE), +          xlab = "Time", ylab = "Observed") +        col_obs <- pch_obs <- 1:length(obs_vars) +        names(col_obs) <- names(pch_obs) <- obs_vars +        for (obs_var in obs_vars) { +          points(subset(observed, name == obs_var, c(time, value)),  +            pch = pch_obs[obs_var], col = col_obs[obs_var]) +        } +        matlines(out_transformed_plot$time, out_transformed_plot[-1]) +        legend("topright", inset=c(0.05, 0.05), legend=obs_vars,  +          col=col_obs, pch=pch_obs, lty=1:length(pch_obs)) +      } +     +      assign("cost.old", mC$model, inherits=TRUE) +    } +	# HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost +	mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res) +	 +    return(mC) +  } +  fit <-modFit(cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, ...) + +  # We need to return some more data for summary and plotting +  fit$solution <- solution +  if (solution == "eigen") { +    fit$coefmat <- mkinmod$coefmat +  }  +  if (solution == "deSolve") { +    fit$mkindiff <- mkindiff +  } +  if (plot == TRUE) { +    fit$out_transformed_plot = out_transformed_plot +  } + +  # We also need various other information for summary and plotting +  fit$map <- mkinmod$map +  fit$diffs <- mkinmod$diffs +   +    # mkin_long_to_wide does not handle ragged data +#    fit$observed <- mkin_long_to_wide(observed) +    fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") +    names(fit$observed) <- c("time", as.vector(unique(observed$name))) +   +  predicted_long <- mkin_wide_to_long(out_predicted, time = "time") +  fit$predicted <- out_predicted + +  # Collect initial parameter values in two dataframes +  fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) +  fit$start$type = c(rep("state", length(state.ini.optim)), rep("deparm", length(parms.optim))) +  fit$start$lower <- lower +  fit$start$upper <- upper + +  fit$fixed <- data.frame( +    value = c(state.ini.fixed, parms.fixed)) +  fit$fixed$type = c(rep("state", length(state.ini.fixed)), rep("deparm", length(parms.fixed))) + +#  # Calculate chi2 error levels according to FOCUS (2006) +  fit$errmin <- CakeChi2(observed, predicted_long, obs_vars, parms.optim, state.ini.optim) + +  # Calculate dissipation times DT50 and DT90 and formation fractions +  parms.all = c(fit$par, parms.fixed) +  fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)),  +    row.names = obs_vars) +  fit$ff <- vector() +  fit$SFORB <- vector() +  for (obs_var in obs_vars) { +    type = names(mkinmod$map[[obs_var]])[1]   + +    fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all) +  } + +  fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed) +   +  # Collect observed, predicted and residuals +  data<-observed +  data$err<-rep(NA,length(data$time)) +  data <- merge(data, predicted_long, by = c("time", "name")) +  names(data)<-c("time", "variable", "observed","err-var", "predicted") +  data$residual <- data$observed - data$predicted +  data$variable <- ordered(data$variable, levels = obs_vars) +  fit$data <- data[order(data$variable, data$time), ] +  fit$atol <- atol + +  class(fit) <- c("CakeFit", "mkinfit", "modFit")  +  return(fit) +} + diff --git a/CakeOlsPlot.R b/CakeOlsPlot.R new file mode 100644 index 0000000..199cc28 --- /dev/null +++ b/CakeOlsPlot.R @@ -0,0 +1,117 @@ +#$Id: CakeOlsPlot.R 216 2011-07-05 14:35:03Z nelr $ +# Generates fitted curves so the GUI can plot them +# Based on code in IRLSkinfit +# Author: Rob Nelson (Tessella) +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011  Syngenta +# Tessella Project Reference: 6245 +# +#    This program is free software: you can redistribute it and/or modify +#    it under the terms of the GNU General Public License as published by +#    the Free Software Foundation, either version 3 of the License, or +#    (at your option) any later version. +#  +#    This program is distributed in the hope that it will be useful, +#    but WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#    GNU General Public License for more details. +#  +#    You should have received a copy of the GNU General Public License +#    along with this program.  If not, see <http://www.gnu.org/licenses/>. + +CakeOlsPlot <- function(fit, xlim = range(fit$data$time), ...) +{ +   cat("<PLOT MODEL START>\n") + +   solution = fit$solution +   if ( is.null(solution) ) { +      solution <- "deSolve" +   } +   atol = fit$atol +   if ( is.null(atol) ) { +      atol = 1.0e-6 +   } +    +  fixed <- fit$fixed$value +  names(fixed) <- rownames(fit$fixed) +  parms.all <- c(fit$par, fixed) +  ininames <- c( +    rownames(subset(fit$start, type == "state")), +    rownames(subset(fit$fixed, type == "state"))) +  odeini <- parms.all[ininames] +  names(odeini) <- names(fit$diffs) + +  outtimes <- seq(0, xlim[2], length.out=101) + +  odenames <- c( +    rownames(subset(fit$start, type == "deparm")), +    rownames(subset(fit$fixed, type == "deparm"))) +  odeparms <- parms.all[odenames] + +  # Solve the system +  evalparse <- function(string) +  { +    eval(parse(text=string), as.list(c(odeparms, odeini))) +  } +  if (solution == "analytical") { +    parent.type = names(fit$map[[1]])[1]   +    parent.name = names(fit$diffs)[[1]] +    o <- switch(parent.type, +      SFO = SFO.solution(outtimes,  +          evalparse(parent.name), +          evalparse(paste("k", parent.name, sep="_"))), +      FOMC = FOMC.solution(outtimes, +          evalparse(parent.name), +          evalparse("alpha"), evalparse("beta")), +      DFOP = DFOP.solution(outtimes, +          evalparse(parent.name), +          evalparse("k1"), evalparse("k2"), +          evalparse("g")), +      HS = HS.solution(outtimes, +          evalparse(parent.name), +          evalparse("k1"), evalparse("k2"), +          evalparse("tb")), +      SFORB = SFORB.solution(outtimes, +          evalparse(parent.name), +          evalparse(paste("k", parent.name, "free_bound", sep="_")), +          evalparse(paste("k", parent.name, "bound_free", sep="_")), +          evalparse(paste("k", parent.name, "free_sink", sep="_"))) +    ) +    out <- cbind(outtimes, o) +    dimnames(out) <- list(outtimes, c("time", parent.name)) +  } +  if (solution == "eigen") { +    coefmat.num <- matrix(sapply(as.vector(fit$coefmat), evalparse),  +      nrow = length(odeini)) +    e <- eigen(coefmat.num) +    c <- solve(e$vectors, odeini) +    f.out <- function(t) { +      e$vectors %*% diag(exp(e$values * t), nrow=length(odeini)) %*% c +    } +    o <- matrix(mapply(f.out, outtimes),  +      nrow = length(odeini), ncol = length(outtimes)) +    dimnames(o) <- list(names(odeini), NULL) +    out <- cbind(time = outtimes, t(o)) +  }  +  if (solution == "deSolve") { +    out <- ode( +      y = odeini, +      times = outtimes, +      func = fit$mkindiff,  +      parms = odeparms, +      atol = atol +    ) +  } +     +  # Output transformation for models with unobserved compartments like SFORB +  out_transformed <- data.frame(time = out[,"time"]) +  for (var in names(fit$map)) { +    if(length(fit$map[[var]]) == 1) { +      out_transformed[var] <- out[, var] +    } else { +      out_transformed[var] <- rowSums(out[, fit$map[[var]]]) +    } +  } +  print(out_transformed) + +  cat("<PLOT MODEL END>\n") +} diff --git a/CakePenalties.R b/CakePenalties.R new file mode 100644 index 0000000..3b2a0df --- /dev/null +++ b/CakePenalties.R @@ -0,0 +1,56 @@ +# $Id: CakePenalties.R 216 2011-07-05 14:35:03Z nelr $ +# The CAKE R modules are based on mkin +# CAKE (6245), by Tessella, for Syngenta: Copyright (C) 2011  Syngenta +# +#    This program is free software: you can redistribute it and/or modify +#    it under the terms of the GNU General Public License as published by +#    the Free Software Foundation, either version 3 of the License, or +#    (at your option) any later version. +#  +#    This program is distributed in the hope that it will be useful, +#    but WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#    GNU General Public License for more details. +#  +#    You should have received a copy of the GNU General Public License +#    along with this program.  If not, see <http://www.gnu.org/licenses/>. + +# Penalty function for fits that should not be accepted +CakePenalties<-function(params, modelled, obs, penalty.scale = 10, ...){ +	# Because the model cost is roughly proportional to points, +	# make the penalties scale that way too for consistent behaviour +	# with many or few data points +	sum(CakePenaltiesLong(params, modelled, obs, penalty.scale, ...)$value) * dim(obs)[[1]] +} +CakePenaltiesLong<-function(params, modelled, obs, penalty.scale = 10, ...){	 +	r<-data.frame(name=character(0), value=numeric(0)) +	 +	# Flow fractions > 1 +	#------------------------------- +	fails<-CakePenaltiesFF(params); +	if(dim(fails)[[1]] > 0){ +		#print("Penalty failures:") +		#print(fails) +		# Add a failure record for each flow fraction that missed +		names(fails)<-c('name', 'value') +		fails$value = 100*(fails$value - 1) +		fails$name = lapply(fails$name, function(x) paste("FF_", x, sep="")) +		r <- rbind(r, fails) +		#totalPenalties <- totalPenalties + 100*sum(fails$x - 1)	# Penalty for failure +	}	 + +	r$value <- r$value * penalty.scale +	return(r) +} + +CakePenaltiesFF<-function(params){ +	ff.values<-params["f_"==substr(names(params), 0, 2)]	# Flow fractions +	if(length(ff.values) > 0){ +		ffs<-data.frame(t(sapply(strsplit(names(ff.values), '_'), FUN=function(x){ x[c(2,4)] })))	# Split into source/dest +		names(ffs)<-c('source', 'dest') +		ffs['value'] <- ff.values	# Add values +		sums<-aggregate(ffs$value, list(s=ffs$source), sum)	# Sum of flows from each source +		fails<-sums[sums$x>1.00001,]	# All compartments > 1 +	} else { fails<-data.frame(s=character(0), x=numeric(0)) } +	return(fails) +}
\ No newline at end of file diff --git a/CakeSummary.R b/CakeSummary.R new file mode 100644 index 0000000..ec42e75 --- /dev/null +++ b/CakeSummary.R @@ -0,0 +1,301 @@ +# $Id: CakeSummary.R 216 2011-07-05 14:35:03Z nelr $ +# CakeSummary: Functions to calculate statistics used in the summary, +# and display the summary itself. + +# Parts modified from package mkin +# Parts Tessella (Rob Nelson/Richard Smith) for Syngenta: Copyright (C) 2011  Syngenta +# Tessella Project Reference: 6245 + +#    This program is free software: you can redistribute it and/or modify +#    it under the terms of the GNU General Public License as published by +#    the Free Software Foundation, either version 3 of the License, or +#    (at your option) any later version. +#  +#    This program is distributed in the hope that it will be useful, +#    but WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#    GNU General Public License for more details. +#  +#    You should have received a copy of the GNU General Public License +#    along with this program.  If not, see <http://www.gnu.org/licenses/>. + +# Calculate the decay times for a compartment +# Arguments +#  type - type of compartment +#  obs_var - compartment name +#  parms.all - list of parameters +CakeDT<-function(type, obs_var, parms.all) { +    if (type == "SFO") { +      # Changed to new parameterisation +      #k_names = grep(paste("k", obs_var, sep="_"), names(parms.all), value=TRUE) +      k_name = paste("k", obs_var, sep="_") +      k_tot = parms.all[k_name] +      DT50 = log(2)/k_tot +      DT90 = log(10)/k_tot +    } +    if (type == "FOMC") { +      alpha = parms.all["alpha"] +      beta = parms.all["beta"] +      DT50 = beta * (2^(1/alpha) - 1) +      DT90 = beta * (10^(1/alpha) - 1) +    } +    if (type == "DFOP") { +      k1 = parms.all["k1"] +      k2 = parms.all["k2"] +      g = parms.all["g"] +      f <- function(t, x) { +        fraction <- g * exp( - k1 * t) + (1 - g) * exp( - k2 * t) +        (fraction - (1 - x/100))^2 +      } +      DTmax <- 1000 +      DT50.o <- optimize(f, c(0.001, DTmax), x=50)$minimum +      DT50 = ifelse(DTmax - DT50.o < 0.1, NA, DT50.o) +      DT90.o <- optimize(f, c(0.001, DTmax), x=90)$minimum +      DT90 = ifelse(DTmax - DT90.o < 0.1, NA, DT90.o) +    } +    if (type == "HS") { +      k1 = parms.all["k1"] +      k2 = parms.all["k2"] +      tb = parms.all["tb"] +      DTx <- function(x) { +        DTx.a <- (log(100/(100 - x)))/k1 +        DTx.b <- tb + (log(100/(100 - x)) - k1 * tb)/k2 +        if (DTx.a < tb) DTx <- DTx.a +        else DTx <- DTx.b +        return(DTx) +      } +      DT50 <- DTx(50) +      DT90 <- DTx(90) +    } +    if (type == "SFORB") { +      # FOCUS kinetics (2006), p. 60 f +      k_out_names = grep(paste("k", obs_var, "free", sep="_"), names(parms.all), value=TRUE) +      k_out_names = setdiff(k_out_names, paste("k", obs_var, "free", "bound", sep="_")) +      k_1output = sum(parms.all[k_out_names]) +      k_12 = parms.all[paste("k", obs_var, "free", "bound", sep="_")] +      k_21 = parms.all[paste("k", obs_var, "bound", "free", sep="_")] + +      sqrt_exp = sqrt(1/4 * (k_12 + k_21 + k_1output)^2 + k_12 * k_21 - (k_12 + k_1output) * k_21) +      b1 = 0.5 * (k_12 + k_21 + k_1output) + sqrt_exp +      b2 = 0.5 * (k_12 + k_21 + k_1output) - sqrt_exp + +      SFORB_fraction = function(t) { +        ((k_12 + k_21 - b1)/(b2 - b1)) * exp(-b1 * t) + +        ((k_12 + k_21 - b2)/(b1 - b2)) * exp(-b2 * t) +      } +      f_50 <- function(t) (SFORB_fraction(t) - 0.5)^2 +      max_DT <- 1000 +      DT50.o <- optimize(f_50, c(0.01, max_DT))$minimum +      if (abs(DT50.o - max_DT) < 0.01) DT50 = NA else DT50 = DT50.o +      f_90 <- function(t) (SFORB_fraction(t) - 0.1)^2 +      DT90.o <- optimize(f_90, c(0.01, 1000))$minimum +      if (abs(DT90.o - max_DT) < 0.01) DT90 = NA else DT90 = DT90.o +    } +    ret<-c(DT50, DT90) +    names(ret)<-c("DT50","DT90") +    ret +} + +# Compute chi2 etc +# Arguments +#  observed - data.frame of observed data +#  predicted_long - fitted values etc +#  obs_vars - compartment names +#  parms.optim - list of fitted parameters +#  state,ini.optim - list of fitted initial values +# +CakeChi2<-function(observed, predicted_long, obs_vars, parms.optim, state.ini.optim) { +  # Calculate chi2 error levels according to FOCUS (2006) +  means <- aggregate(value ~ time + name, data = observed, mean, na.rm=TRUE) +  errdata <- merge(means, predicted_long, by = c("time", "name"), suffixes = c("_mean", "_pred")) +  errdata <- errdata[order(errdata$time, errdata$name), ] +  errmin.overall <- mkinerrmin(errdata, length(parms.optim) + length(state.ini.optim)) + +  errmin <- data.frame(err.min = errmin.overall$err.min,  +    n.optim = errmin.overall$n.optim, df = errmin.overall$df) +  rownames(errmin) <- "All data" +  for (obs_var in obs_vars) +  { +    errdata.var <- subset(errdata, name == obs_var) +    n.k.optim <- length(grep(paste("k", obs_var, sep="_"), names(parms.optim))) +    n.initials.optim <- length(grep(paste(obs_var, ".*", "_0", sep=""), names(state.ini.optim))) +    n.optim <- n.k.optim + n.initials.optim +    if ("alpha" %in% names(parms.optim)) n.optim <- n.optim + 1 +    if ("beta" %in% names(parms.optim)) n.optim <- n.optim + 1 +    if ("k1" %in% names(parms.optim)) n.optim <- n.optim + 1 +    if ("k2" %in% names(parms.optim)) n.optim <- n.optim + 1 +    if ("g" %in% names(parms.optim)) n.optim <- n.optim + 1 +    if ("tb" %in% names(parms.optim)) n.optim <- n.optim + 1 +    errmin.tmp <- mkinerrmin(errdata.var, n.optim) +    errmin[obs_var, c("err.min", "n.optim", "df")] <- errmin.tmp +  } +  errmin +} + +# Compute additional statistics (rē/efficiency) of obs v pred for each compartment and the whole problem +CakeAdditionalStats<-function(obsvspred){ +	agg<-aggregate(obsvspred[c('observed', 'err-var', 'predicted', 'residual')], list(name=obsvspred$variable), function(x){x}, simplify=FALSE) +	frames<-apply(agg, 1, as.data.frame); +	names(frames)<-agg$name; +	frames$all<-obsvspred +	 +	t(sapply(frames, function(frame){ +		# This function takes a data frame for a single variable and makes the stats for it +		# rē: FOCUS eq 6.5 p. 97 +		# Efficiency: eq 6.6 p. 99 +		do <- frame$observed - mean(frame$observed) +		dp <- frame$predicted - mean(frame$predicted) +		r2 <- ( sum(do * dp) / sqrt(sum(do^2) * sum(dp^2)) ) ^ 2 +		eff <- 1 - ( sum((frame$predicted - frame$observed)^2) / sum(do^2) ) +		list(r2=r2, eff=eff) +	})) +} + +# Summarise a fit - used for CakeIrlsFit and CakeOlsFit +summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, ff = TRUE, cov = FALSE,...) { +  param  <- object$par +  pnames <- names(param) +  p      <- length(param) +  covar  <- try(solve(0.5*object$hessian), silent = TRUE)   # unscaled covariance + +  rdf    <- object$df.residual +  resvar <- object$ssr / rdf +  modVariance <- object$ssr / length(object$residuals) +   +  if (!is.numeric(covar)) { +#    message <- "Cannot estimate covariance; system is singular" +#    warning(message) +#    covar <- matrix(data = NA, nrow = p, ncol = p) +   covar=NULL +  } else { +    message <- "ok" +    rownames(covar) <- colnames(covar) <-pnames +    se     <- sqrt(diag(covar) * resvar) +    names(se) <- pnames +    tval      <- param / se +  } + +  if (!all(object$start$lower >=0)) { +    message <- "Note that the one-sided t-test may not be appropriate if +      parameter values below zero are possible." +    warning(message) +  } else message <- "ok" + +  if(cov && !is.null(covar)) { +    alpha <- 0.05 +    lci <- param + qt(alpha/2,rdf)*se +    uci <- param + qt(1-alpha/2,rdf)*se +    param <- cbind(param, se, tval, pt(tval, rdf, lower.tail = FALSE), lci, uci) +    dimnames(param) <- list(pnames, c("Estimate", "Std. Error", +                                    "t value", "Pr(>t)", "Lower CI", "Upper CI")) +    ans <- list(residuals = object$residuals, +                residualVariance = resvar, +                sigma = sqrt(resvar), +                modVariance = modVariance, +                df = c(p, rdf), cov.unscaled = covar, +                cov.scaled = covar * resvar, +                info = object$info, niter = object$iterations, +                stopmess = message, +                par = param) +  } else { +    param <- cbind(param)   +    ans <- list(residuals = object$residuals, +                residualVariance = resvar, +                sigma = sqrt(resvar), +                modVariance = modVariance, +                df = c(p, rdf), +                info = object$info, niter = object$iterations, +                stopmess = message, +                par = param) +    } + +  ans$diffs <- object$diffs +  if(data) { +	ans$data <- object$data +	ans$additionalstats = CakeAdditionalStats(object$data) +  } +  ans$start <- object$start +  ans$fixed <- object$fixed +  ans$errmin <- object$errmin  +  ans$penalties <- object$penalties +  if(distimes) ans$distimes <- object$distimes +  if(ff) ans$ff <- object$ff +  if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB +  class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.modFit")  +  return(ans)   +} + +# Print a summary. Used for CakeIrlsFit, CakeOlsFit and CakeMcmcFit +# Expanded from print.summary.modFit +print.summary.CakeFit <- function(x, digits = max(3, getOption("digits") - 3), ...) { +  cat("\nEquations:\n") +  print(noquote(as.character(x[["diffs"]]))) +  df  <- x$df +  rdf <- df[2] + +  cat("\nStarting values for optimised parameters:\n") +  print(x$start) + +  cat("\nFixed parameter values:\n") +  if(length(x$fixed$value) == 0) cat("None\n") +  else print(x$fixed) +   +  if ( !is.null(x$par) ) { +    cat("\nOptimised parameters:\n") +    printCoefmat(x$par, digits = digits, ...) +  } + +  cat("\nResidual standard error:", +      format(signif(x$sigma, digits)), "on", rdf, "degrees of freedom\n") + +  cat("\nChi2 error levels in percent:\n") +  x$errmin$err.min <- 100 * x$errmin$err.min +  print(x$errmin, digits=digits,...) + +  printdistimes <- !is.null(x$distimes) +  if(printdistimes){ +    cat("\nEstimated disappearance times:\n") +    print(x$distimes, digits=digits,...) +  }     + +  printff <- !is.null(x$ff) +  if(printff){ +    cat("\nEstimated formation fractions:\n") +    print(data.frame(ff = x$ff), digits=digits,...) +  }     + +  printSFORB <- !is.null(x$SFORB) +  if(printSFORB){ +    cat("\nEstimated Eigenvalues of SFORB model(s):\n") +    print(x$SFORB, digits=digits,...) +  }     + +  printcor <- !is.null(x$cov.unscaled) +  if (printcor){ +    Corr <- cov2cor(x$cov.unscaled) +    rownames(Corr) <- colnames(Corr) <- rownames(x$par) +    cat("\nParameter correlation:\n") +    print(Corr, digits = digits, ...) +  } + +  printdata <- !is.null(x$data) +  if (printdata){ +    cat("\nData:\n") +    print(format(x$data, digits = digits, scientific = FALSE,...), row.names = FALSE) +  } +   +  if(!is.null(x$additionalstats)){ +	cat("\nAdditional Stats:\n"); +	print(x$additionalstats, digits=digits, ...) +  } +   +  if(!(is.null(x$penalties) || 0 == dim(x$penalties)[[1]])){ +	cat("\nPenalties:\n"); +	print(x$penalties, digits=digits, row.names = FALSE, ...) +  } +   +  if ( !is.null(x$seed) ) { +      cat("\nRandom Seed:", x$seed, "\n") +  } +  invisible(x) +}  | 
