summaryrefslogtreecommitdiff
path: root/CakeCost.R
diff options
context:
space:
mode:
Diffstat (limited to 'CakeCost.R')
-rw-r--r--CakeCost.R348
1 files changed, 348 insertions, 0 deletions
diff --git a/CakeCost.R b/CakeCost.R
new file mode 100644
index 0000000..1cc3981
--- /dev/null
+++ b/CakeCost.R
@@ -0,0 +1,348 @@
+## -----------------------------------------------------------------------------
+## The model cost and residuals
+## -----------------------------------------------------------------------------
+# The CAKE R modules are based on mkin,
+# Call to approx is only performed if there are multiple non NA values
+# which should prevent most of the crashes - Rob Nelson (Tessella)
+#
+# Modifications developed by Tessella Plc for Syngenta, Copyright (C) 2011 Syngenta
+# Authors: Rob Nelson, Richard Smith
+# Tessella Project Reference: 6245
+#
+# This program is free software: you can
+# redistribute them and/or modify them under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation, either version 3 of the License, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>#
+#
+# $Id: CakeCost.R 216 2011-07-05 14:35:03Z nelr $
+
+CakeCost <- function (model, obs, x = "time", y = NULL, err = NULL,
+ weight = "none", scaleVar = FALSE, cost = NULL, ...) {
+
+ ## convert vector to matrix
+ if (is.vector(obs)) {
+ cn <- names(obs)
+ obs <- matrix(data = obs, nrow = 1)
+ colnames(obs) <- cn
+ }
+ if (is.vector(model)) {
+ cn <- names(model)
+ model <- matrix(data=model, nrow = 1)
+ colnames(model) <- cn
+ }
+
+ ##=============================================================================
+ ## Observations
+ ##=============================================================================
+
+ ## The position of independent variable(s)
+ ix <- 0
+ if (! is.null(x)) { # mapping required...
+ ## For now multiple independent variables are not supported...
+ if (length(x) > 1)
+ stop ("multiple independent variables in 'obs' are not yet supported")
+
+ if (! is.character(x))
+ stop ("'x' should be the *name* of the column with the independent variable in 'obs' or NULL")
+ ix <- which(colnames(obs) %in% x)
+ if (length(ix) != length(x))
+ stop(paste("Independent variable column not found in observations", x))
+ } else ix <- NULL
+
+ ## The position of weighing values
+ ierr <- 0
+ if (! is.null(err)) {
+ if (! is.character(err))
+ stop ("'err' should be the *name* of the column with the error estimates in obs or NULL")
+ ierr <- which(colnames(obs) == err) # only one
+ if (length(ierr) == 0)
+ stop(paste("Column with error estimates not found in observations", err))
+ }
+
+ ## The dependent variables
+ type <- 1 # data input type: type 2 is table format, type 1 is long format...
+
+ if (!is.null(y)) { # it is in table format; first column are names of observed data...
+
+ Names <- as.character(unique(obs[, 1])) # Names of data sets, all data should be model variables...
+ Ndat <- length(Names) # Number of data sets
+ ilist <- 1:Ndat
+ if (! is.character(y))
+ stop ("'y' should be the *name* of the column with the values of the dependent variable in obs")
+ iy <- which(colnames(obs) == y)
+ if (length(iy) == 0)
+ stop(paste("Column with value of dependent variable not found in observations", y))
+ type <- 2
+
+ } else { # it is a matrix, variable names are column names
+ Ndat <- NCOL(obs)-1
+ Names <- colnames(obs)
+ ilist <- (1:NCOL(obs)) # column positions of the (dependent) observed variables
+ exclude <- ix # exclude columns that are not
+ if (ierr > 0)
+ exclude <- c(ix, ierr) # exclude columns that are not
+ if (length(exclude) > 0)
+ ilist <- ilist[-exclude]
+ }
+
+ #================================
+ # The model results
+ #================================
+
+ ModNames <- colnames(model) # Names of model variables
+ if (length(ix) > 1) {
+ ixMod <- NULL
+
+ for ( i in 1:length(ix)) {
+ ix2 <- which(colnames(model) == x[i])
+ if (length(ix2) == 0)
+ stop(paste("Cannot calculate cost: independent variable not found in model output", x[i]))
+ ixMod <- c(ixMod, ix2)
+ }
+
+ xMod <- model[,ixMod] # Independent variable, model
+ } else if (length(ix) == 1) {
+ ixMod <- which(colnames(model) == x)
+ if (length(ixMod) == 0)
+ stop(paste("Cannot calculate cost: independent variable not found in model output", x))
+ xMod <- model[,ixMod] # Independent variable, model
+ }
+ Residual <- NULL
+ CostVar <- NULL
+
+ #================================
+ # Compare model and data...
+ #================================
+ xDat <- 0
+ iDat <- 1:nrow(obs)
+
+ for (i in ilist) { # for each observed variable ...
+ ii <- which(ModNames == Names[i])
+ if (length(ii) == 0) stop(paste("observed variable not found in model output", Names[i]))
+ yMod <- model[,ii]
+ if (type == 2) { # table format
+ iDat <- which (obs[,1] == Names[i])
+ if (length(ix) > 0) xDat <- obs[iDat, ix]
+ obsdat <- obs[iDat, iy]
+ } else {
+ if (length(ix) > 0) xDat <- obs[, 1]
+ obsdat <- obs[,i]
+ }
+ ii <- which(is.na(obsdat))
+ if (length(ii) > 0) {
+ xDat <- xDat[-ii]
+ obsdat <- obsdat[-ii]
+ }
+
+ # CAKE version - Added tests for multiple non-NA values
+ if (length(ix) > 0 && length(unique(xMod[!is.na(xMod)]))>1 && length(yMod[!is.na(yMod)])>1)
+ {
+ ModVar <- approx(xMod, yMod, xout = xDat)$y
+ }
+ else {
+ cat("CakeCost Warning: Only one valid point - using mean (yMod was", yMod, ")\n")
+ ModVar <- mean(yMod[!is.na(yMod)])
+ obsdat <- mean(obsdat)
+ }
+ iex <- which(!is.na(ModVar))
+ ModVar <- ModVar[iex]
+ obsdat <- obsdat[iex]
+ xDat <- xDat[iex]
+ if (ierr > 0) {
+ Err <- obs[iDat, ierr]
+ Err <- Err[iex]
+ } else {
+ if (weight == "std")
+ Err <- sd(obsdat)
+ else if (weight == "mean")
+ Err <- mean(abs(obsdat))
+ else if (weight == "none")
+ Err <- 1
+ else
+ stop("error: do not recognize 'weight'; should be one of 'none', 'std', 'mean'")
+ }
+ if (any(is.na(Err)))
+ stop(paste("error: cannot estimate weighing for observed variable: ", Names[i]))
+ if (min(Err) <= 0)
+ stop(paste("error: weighing for observed variable is 0 or negative:", Names[i]))
+ if (scaleVar)
+ Scale <- 1/length(obsdat)
+ else Scale <- 1
+ Res <- (ModVar- obsdat)
+ res <- Res/Err
+ resScaled <- res*Scale
+ Residual <- rbind(Residual,
+ data.frame(
+ name = Names[i],
+ x = xDat,
+ obs = obsdat,
+ mod = ModVar,
+ weight = 1/Err,
+ res.unweighted = Res,
+ res = res))
+
+ CostVar <- rbind(CostVar,
+ data.frame(
+ name = Names[i],
+ scale = Scale,
+ N = length(Res),
+ SSR.unweighted = sum(Res^2),
+ SSR.unscaled = sum(res^2),
+ SSR = sum(resScaled^2)))
+ } # end loop over all observed variables
+
+ ## SSR
+ Cost <- sum(CostVar$SSR * CostVar$scale)
+
+ Lprob <- -sum(log(pmax(0, dnorm(Residual$mod, Residual$obs, Err)))) # avoid log of negative values
+ #Lprob <- -sum(log(pmax(.Machine$double.xmin, dnorm(Residual$mod, Residual$obs, Err)))) #avoid log(0)
+
+ if (! is.null(cost)) {
+ Cost <- Cost + cost$model
+ CostVar <- rbind(CostVar, cost$var)
+ Residual <- rbind(Residual, cost$residuals)
+ Lprob <- Lprob + cost$minlogp
+ }
+ out <- list(model = Cost, cost = Cost, minlogp = Lprob, var = CostVar, residuals = Residual)
+ class(out) <- "modCost"
+ return(out)
+}
+
+## -----------------------------------------------------------------------------
+## S3 methods of modCost
+## -----------------------------------------------------------------------------
+
+plot.modCost<- function(x, legpos="topleft", ...) {
+ nvar <- nrow(x$var)
+
+ dots <- list(...)
+
+ dots$xlab <- if(is.null(dots$xlab)) "x" else dots$xlab
+ dots$ylab <- if(is.null(dots$ylab)) "weighted residuals" else dots$ylab
+ DotsPch <- if(is.null(dots$pch)) (16:24) else dots$pch
+ dots$pch <- if(is.null(dots$pch)) (16:24)[x$residuals$name] else dots$pch[x$residuals$name]
+ DotsCol <- if(is.null(dots$col)) (1:nvar) else dots$col
+ dots$col <- if(is.null(dots$col)) (1:nvar)[x$residuals$name] else dots$col[x$residuals$name]
+
+ do.call("plot", c(alist(x$residuals$x, x$residuals$res), dots))
+
+# plot(x$residuals$x, x$residuals$res, xlab="x", ylab="weighted residuals",
+# pch=c(16:24)[x$residuals$name],col=c(1:nvar)[x$residuals$name],...)
+
+ if (! is.na(legpos))
+ legend(legpos, legend = x$var$name, col = DotsCol, pch = DotsPch)
+}
+
+## -----------------------------------------------------------------------------
+## Internal cost function for optimisers
+## -----------------------------------------------------------------------------
+# Cost function. The returned structure must have $model
+# Called from the middle of IrlsFit and OlsFit
+# We need to preserve state between calls so make a closure
+CakeInternalCostFunctions <- function(mkinmod, state.ini.optim, state.ini.optim.boxnames,
+ state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar,
+ quiet, plot=FALSE, atol=1e-6){
+ cost.old <- 1e+100
+ calls <- 0
+ out_predicted <- NA
+
+ get.predicted <- function(){ out_predicted }
+
+ get.best.cost <- function(){ cost.old }
+ reset.best.cost <- function() { cost.old<<-1e+100 }
+
+ get.calls <- function(){ calls }
+ set.calls <- function(newcalls){ calls <<- newcalls }
+
+ set.error<-function(err) { observed$err <<- err }
+
+ cost <- function(P) {
+ assign("calls", calls + 1, inherits = TRUE)
+ print(P)
+ if (length(state.ini.optim) > 0) {
+ odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed)
+ names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed))
+ }
+ else odeini <- state.ini.fixed
+ odeparms <- c(P[(length(state.ini.optim) + 1):length(P)],
+ parms.fixed)
+ # Ensure initial state is at time 0
+ outtimes = unique(c(0,observed$time))
+ out <- ode(y = odeini, times = outtimes, func = mkindiff,
+ parms = odeparms, atol=atol)
+ out_transformed <- data.frame(time = out[, "time"])
+ for (var in names(mkinmod$map)) {
+ if (length(mkinmod$map[[var]]) == 1) {
+ out_transformed[var] <- out[, var]
+ }
+ else {
+ out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]])
+ }
+ }
+ assign("out_predicted", out_transformed, inherits = TRUE)
+ mC <- CakeCost(out_transformed, observed, y = "value",
+ err = 'err', scaleVar = scaleVar)
+ mC$penalties <- CakePenalties(odeparms, out_transformed, observed)
+ mC$model <- mC$cost + mC$penalties;
+ if (mC$model < cost.old) {
+ if (!quiet)
+ cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model,
+ "\n")
+ if (plot) {
+ outtimes_plot = seq(min(observed$time), max(observed$time),
+ length.out = 100)
+ out_plot <- ode(y = odeini, times = outtimes_plot,
+ func = mkindiff, parms = odeparms, atol=atol)
+ out_transformed_plot <- data.frame(time = out_plot[,
+ "time"])
+ for (var in names(mkinmod$map)) {
+ if (length(mkinmod$map[[var]]) == 1) {
+ out_transformed_plot[var] <- out_plot[, var]
+ }
+ else {
+ out_transformed_plot[var] <- rowSums(out_plot[,
+ mkinmod$map[[var]]])
+ }
+ }
+ plot(0, type = "n", xlim = range(observed$time),
+ ylim = range(observed$value, na.rm = TRUE),
+ xlab = "Time", ylab = "Observed")
+ col_obs <- pch_obs <- 1:length(obs_vars)
+ names(col_obs) <- names(pch_obs) <- obs_vars
+ for (obs_var in obs_vars) {
+ points(subset(observed, name == obs_var, c(time,
+ value)), pch = pch_obs[obs_var], col = col_obs[obs_var])
+ }
+ matlines(out_transformed_plot$time, out_transformed_plot[-1])
+ legend("topright", inset = c(0.05, 0.05), legend = obs_vars,
+ col = col_obs, pch = pch_obs, lty = 1:length(pch_obs))
+ }
+ assign("cost.old", mC$model, inherits = TRUE)
+ }
+ # HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost
+ if(mC$penalties > 0){
+ #cat("Penalty adjustment: ",mC$penalties)
+ mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res)
+ }
+
+ return(mC)
+ }
+
+
+
+ list(cost=cost,
+ get.predicted=get.predicted,
+ get.calls=get.calls, set.calls=set.calls,
+ get.best.cost=get.best.cost, reset.best.cost=reset.best.cost,
+ set.error=set.error
+ )
+} \ No newline at end of file

Contact - Imprint