From be6d42ef636e8e1c9fdcfa6f8738ee10e885d75b Mon Sep 17 00:00:00 2001
From: Johannes Ranke <jranke@uni-bremen.de>
Date: Wed, 18 Oct 2017 10:17:59 +0200
Subject: Version 1.4

---
 CakeCost.R | 348 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 348 insertions(+)
 create mode 100644 CakeCost.R

(limited to 'CakeCost.R')

diff --git a/CakeCost.R b/CakeCost.R
new file mode 100644
index 0000000..1cc3981
--- /dev/null
+++ b/CakeCost.R
@@ -0,0 +1,348 @@
+## -----------------------------------------------------------------------------
+## The model cost and residuals
+## -----------------------------------------------------------------------------
+# The CAKE R modules are based on mkin, 
+# Call to approx is only performed if there are multiple non NA values
+# which should prevent most of the crashes - Rob Nelson (Tessella)
+#
+# Modifications developed by Tessella Plc for Syngenta, Copyright (C) 2011 Syngenta
+# Authors: Rob Nelson, Richard Smith
+# Tessella Project Reference: 6245
+#
+# This program is free software: you can
+# redistribute them and/or modify them under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation, either version 3 of the License, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/># 
+#
+# $Id: CakeCost.R 216 2011-07-05 14:35:03Z nelr $
+
+CakeCost <- function (model, obs, x = "time", y = NULL, err = NULL,
+                     weight = "none", scaleVar = FALSE, cost = NULL, ...) {
+
+  ## convert vector to matrix
+  if (is.vector(obs)) {
+    cn <- names(obs)
+    obs   <- matrix(data = obs, nrow = 1)
+    colnames(obs) <- cn
+  }
+  if (is.vector(model)) {
+    cn <- names(model)
+    model <- matrix(data=model, nrow = 1)
+    colnames(model) <- cn
+  }
+
+  ##=============================================================================
+  ## Observations
+  ##=============================================================================
+
+  ## The position of independent variable(s)
+  ix <- 0
+  if (! is.null(x))  {   # mapping required...
+    ## For now multiple independent variables are not supported...
+    if (length(x) > 1)
+      stop ("multiple independent variables in 'obs' are not yet supported")
+
+    if (! is.character(x))
+      stop ("'x' should be the *name* of the column with the independent variable in 'obs' or NULL")
+    ix  <- which(colnames(obs) %in% x)
+    if (length(ix) != length(x))
+      stop(paste("Independent variable column not found in observations", x))
+  } else ix <- NULL
+
+  ## The position of weighing values
+  ierr <- 0
+  if (! is.null(err)) {
+    if (! is.character(err))
+      stop ("'err' should be the *name* of the column with the error estimates in obs or NULL")
+    ierr <- which(colnames(obs) == err)    # only one
+    if (length(ierr) == 0)
+      stop(paste("Column with error estimates not found in observations", err))
+  }
+
+  ## The dependent variables
+  type <- 1           # data input type: type 2 is table format, type 1 is long format...
+
+  if (!is.null(y)) {   # it is in table format; first column are names of observed data...
+
+    Names    <- as.character(unique(obs[, 1]))  # Names of data sets, all data should be model variables...
+    Ndat     <- length(Names)                   # Number of data sets
+    ilist    <- 1:Ndat
+    if (! is.character(y))
+      stop ("'y' should be the *name* of the column with the values of the dependent variable in obs")
+    iy  <- which(colnames(obs) == y)
+    if (length(iy) == 0)
+      stop(paste("Column with value of dependent variable not found in observations", y))
+    type <- 2
+
+  } else  {             # it is a matrix, variable names are column names
+    Ndat     <- NCOL(obs)-1
+    Names    <- colnames(obs)
+    ilist    <- (1:NCOL(obs))        # column positions of the (dependent) observed variables
+    exclude  <- ix                   # exclude columns that are not
+    if (ierr > 0)
+      exclude <- c(ix, ierr)          # exclude columns that are not
+    if (length(exclude) > 0)
+      ilist <- ilist[-exclude]
+  }
+
+  #================================
+  # The model results
+  #================================
+
+  ModNames <- colnames(model)  # Names of model variables
+  if (length(ix) > 1) {
+    ixMod <- NULL
+
+    for ( i in 1:length(ix)) {
+      ix2 <- which(colnames(model) == x[i])
+      if (length(ix2) == 0)
+        stop(paste("Cannot calculate cost: independent variable not found in model output", x[i]))
+      ixMod <- c(ixMod, ix2)
+    }
+
+  xMod     <- model[,ixMod]    # Independent variable, model
+  } else if (length(ix) == 1) {
+   ixMod    <- which(colnames(model) == x)
+   if (length(ixMod) == 0)
+     stop(paste("Cannot calculate cost: independent variable not found in model output", x))
+   xMod     <- model[,ixMod]    # Independent variable, model
+  }
+  Residual <- NULL
+  CostVar  <- NULL
+
+  #================================
+  # Compare model and data...
+  #================================
+  xDat <- 0
+  iDat <- 1:nrow(obs)
+
+  for (i in ilist) {   # for each observed variable ...
+    ii <- which(ModNames == Names[i])
+    if (length(ii) == 0) stop(paste("observed variable not found in model output", Names[i]))
+    yMod <- model[,ii]
+    if (type == 2)  {  # table format
+      iDat <- which (obs[,1] == Names[i])
+      if (length(ix) > 0) xDat <- obs[iDat, ix]
+      obsdat <- obs[iDat, iy]
+    } else {
+      if (length(ix) > 0) xDat <- obs[, 1]
+      obsdat <- obs[,i]
+    }
+    ii <- which(is.na(obsdat))
+    if (length(ii) > 0) {
+      xDat   <- xDat[-ii]
+      obsdat <- obsdat[-ii]
+    }
+
+    # CAKE version - Added tests for multiple non-NA values 
+    if (length(ix) > 0 && length(unique(xMod[!is.na(xMod)]))>1 && length(yMod[!is.na(yMod)])>1)
+    {
+      ModVar <- approx(xMod, yMod, xout = xDat)$y
+    }
+    else {
+      cat("CakeCost Warning: Only one valid point - using mean (yMod was", yMod, ")\n")
+      ModVar <- mean(yMod[!is.na(yMod)])
+      obsdat <- mean(obsdat)
+    }
+    iex <- which(!is.na(ModVar))
+    ModVar <- ModVar[iex]
+    obsdat <- obsdat[iex]
+    xDat   <- xDat[iex]
+    if (ierr > 0) {
+      Err <- obs[iDat, ierr]
+      Err <- Err[iex]
+    } else {
+      if (weight == "std")
+        Err <- sd(obsdat)
+      else if (weight == "mean")
+        Err <- mean(abs(obsdat))
+      else if (weight == "none")
+        Err <- 1
+      else
+       stop("error: do not recognize 'weight'; should be one of 'none', 'std', 'mean'")
+    }
+    if (any(is.na(Err)))
+      stop(paste("error: cannot estimate weighing for observed variable: ", Names[i]))
+    if (min(Err) <= 0)
+      stop(paste("error: weighing for observed variable is 0 or negative:", Names[i]))
+    if (scaleVar)
+      Scale <- 1/length(obsdat)
+    else Scale <- 1
+    Res <- (ModVar- obsdat)
+    res <- Res/Err
+    resScaled <- res*Scale
+    Residual <- rbind(Residual,
+                      data.frame(
+                        name   = Names[i],
+                        x      = xDat,
+                        obs    = obsdat,
+                        mod    = ModVar,
+                        weight = 1/Err,
+                        res.unweighted = Res,
+                        res    = res))
+
+    CostVar <- rbind(CostVar,
+                     data.frame(
+                       name           = Names[i],
+                       scale          = Scale,
+                       N              = length(Res),
+                       SSR.unweighted = sum(Res^2),
+                       SSR.unscaled   = sum(res^2),
+                       SSR            = sum(resScaled^2)))
+  }  # end loop over all observed variables
+
+  ## SSR
+  Cost  <- sum(CostVar$SSR * CostVar$scale)
+  
+  Lprob <- -sum(log(pmax(0, dnorm(Residual$mod, Residual$obs, Err)))) # avoid log of negative values
+  #Lprob <- -sum(log(pmax(.Machine$double.xmin, dnorm(Residual$mod, Residual$obs, Err)))) #avoid log(0)
+
+  if (! is.null(cost)) {
+    Cost     <- Cost + cost$model
+    CostVar  <- rbind(CostVar, cost$var)
+    Residual <- rbind(Residual, cost$residuals)
+    Lprob    <- Lprob + cost$minlogp
+  }
+  out <- list(model = Cost, cost = Cost, minlogp = Lprob, var = CostVar, residuals = Residual)
+  class(out) <- "modCost"
+  return(out)
+}
+
+## -----------------------------------------------------------------------------
+## S3 methods of modCost
+## -----------------------------------------------------------------------------
+
+plot.modCost<- function(x, legpos="topleft", ...) {
+  nvar <- nrow(x$var)
+
+  dots <- list(...)
+
+  dots$xlab <- if(is.null(dots$xlab)) "x" else dots$xlab
+  dots$ylab <- if(is.null(dots$ylab)) "weighted residuals" else dots$ylab
+  DotsPch   <- if(is.null(dots$pch)) (16:24) else dots$pch
+  dots$pch  <- if(is.null(dots$pch)) (16:24)[x$residuals$name] else dots$pch[x$residuals$name]
+  DotsCol   <- if(is.null(dots$col)) (1:nvar) else dots$col
+  dots$col  <- if(is.null(dots$col)) (1:nvar)[x$residuals$name] else dots$col[x$residuals$name]
+
+  do.call("plot", c(alist(x$residuals$x, x$residuals$res), dots))
+
+#  plot(x$residuals$x, x$residuals$res, xlab="x", ylab="weighted residuals",
+#     pch=c(16:24)[x$residuals$name],col=c(1:nvar)[x$residuals$name],...)
+
+  if (! is.na(legpos))
+    legend(legpos, legend = x$var$name, col = DotsCol, pch = DotsPch)
+}
+
+## -----------------------------------------------------------------------------
+## Internal cost function for optimisers
+## -----------------------------------------------------------------------------
+# Cost function. The returned structure must have $model
+# Called from the middle of IrlsFit and OlsFit
+# We need to preserve state between calls so make a closure
+CakeInternalCostFunctions <- function(mkinmod, state.ini.optim, state.ini.optim.boxnames, 
+                                    state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, 
+                                    quiet, plot=FALSE, atol=1e-6){
+    cost.old <- 1e+100
+    calls <- 0
+    out_predicted <- NA
+	
+	get.predicted <- function(){ out_predicted }
+	
+	get.best.cost <- function(){ cost.old }
+	reset.best.cost <- function() { cost.old<<-1e+100 }
+	
+	get.calls <- function(){ calls }
+	set.calls <- function(newcalls){ calls <<- newcalls }
+	
+	set.error<-function(err) { observed$err <<- err }
+	
+	cost <- function(P) {
+		assign("calls", calls + 1, inherits = TRUE)
+		print(P)
+		if (length(state.ini.optim) > 0) {
+			odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed)
+			names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed))
+		}
+		else odeini <- state.ini.fixed
+		odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], 
+			parms.fixed)
+		# Ensure initial state is at time 0
+		outtimes = unique(c(0,observed$time))
+		out <- ode(y = odeini, times = outtimes, func = mkindiff, 
+			parms = odeparms, atol=atol)
+		out_transformed <- data.frame(time = out[, "time"])
+		for (var in names(mkinmod$map)) {
+			if (length(mkinmod$map[[var]]) == 1) {
+				out_transformed[var] <- out[, var]
+			}
+			else {
+				out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]])
+			}
+		}
+		assign("out_predicted", out_transformed, inherits = TRUE)
+		mC <- CakeCost(out_transformed, observed, y = "value", 
+			err = 'err', scaleVar = scaleVar)
+		mC$penalties <- CakePenalties(odeparms, out_transformed, observed)
+		mC$model <- mC$cost + mC$penalties;
+		if (mC$model < cost.old) {
+			if (!quiet) 
+				cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model,
+				  "\n")
+			if (plot) {
+				outtimes_plot = seq(min(observed$time), max(observed$time), 
+				  length.out = 100)
+				out_plot <- ode(y = odeini, times = outtimes_plot, 
+				  func = mkindiff, parms = odeparms, atol=atol)
+				out_transformed_plot <- data.frame(time = out_plot[, 
+				  "time"])
+				for (var in names(mkinmod$map)) {
+				  if (length(mkinmod$map[[var]]) == 1) {
+					out_transformed_plot[var] <- out_plot[, var]
+				  }
+				  else {
+					out_transformed_plot[var] <- rowSums(out_plot[, 
+					  mkinmod$map[[var]]])
+				  }
+				}
+				plot(0, type = "n", xlim = range(observed$time), 
+				  ylim = range(observed$value, na.rm = TRUE), 
+				  xlab = "Time", ylab = "Observed")
+				col_obs <- pch_obs <- 1:length(obs_vars)
+				names(col_obs) <- names(pch_obs) <- obs_vars
+				for (obs_var in obs_vars) {
+				  points(subset(observed, name == obs_var, c(time, 
+					value)), pch = pch_obs[obs_var], col = col_obs[obs_var])
+				}
+				matlines(out_transformed_plot$time, out_transformed_plot[-1])
+				legend("topright", inset = c(0.05, 0.05), legend = obs_vars, 
+				  col = col_obs, pch = pch_obs, lty = 1:length(pch_obs))
+			}
+			assign("cost.old", mC$model, inherits = TRUE)
+		}
+		# HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost
+		if(mC$penalties > 0){
+			#cat("Penalty adjustment: ",mC$penalties)
+			mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res)
+		}
+		
+		return(mC)
+	}
+		
+
+	
+	list(cost=cost, 
+		get.predicted=get.predicted,
+		get.calls=get.calls, set.calls=set.calls,
+		get.best.cost=get.best.cost, reset.best.cost=reset.best.cost,
+		set.error=set.error
+	)
+}
\ No newline at end of file
-- 
cgit v1.2.1