diff options
Diffstat (limited to 'CakeCost.R')
-rw-r--r-- | CakeCost.R | 348 |
1 files changed, 348 insertions, 0 deletions
diff --git a/CakeCost.R b/CakeCost.R new file mode 100644 index 0000000..1cc3981 --- /dev/null +++ b/CakeCost.R @@ -0,0 +1,348 @@ +## ----------------------------------------------------------------------------- +## The model cost and residuals +## ----------------------------------------------------------------------------- +# The CAKE R modules are based on mkin, +# Call to approx is only performed if there are multiple non NA values +# which should prevent most of the crashes - Rob Nelson (Tessella) +# +# Modifications developed by Tessella Plc for Syngenta, Copyright (C) 2011 Syngenta +# Authors: Rob Nelson, Richard Smith +# Tessella Project Reference: 6245 +# +# This program is free software: you can +# redistribute them and/or modify them under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/># +# +# $Id: CakeCost.R 216 2011-07-05 14:35:03Z nelr $ + +CakeCost <- function (model, obs, x = "time", y = NULL, err = NULL, + weight = "none", scaleVar = FALSE, cost = NULL, ...) { + + ## convert vector to matrix + if (is.vector(obs)) { + cn <- names(obs) + obs <- matrix(data = obs, nrow = 1) + colnames(obs) <- cn + } + if (is.vector(model)) { + cn <- names(model) + model <- matrix(data=model, nrow = 1) + colnames(model) <- cn + } + + ##============================================================================= + ## Observations + ##============================================================================= + + ## The position of independent variable(s) + ix <- 0 + if (! is.null(x)) { # mapping required... + ## For now multiple independent variables are not supported... + if (length(x) > 1) + stop ("multiple independent variables in 'obs' are not yet supported") + + if (! is.character(x)) + stop ("'x' should be the *name* of the column with the independent variable in 'obs' or NULL") + ix <- which(colnames(obs) %in% x) + if (length(ix) != length(x)) + stop(paste("Independent variable column not found in observations", x)) + } else ix <- NULL + + ## The position of weighing values + ierr <- 0 + if (! is.null(err)) { + if (! is.character(err)) + stop ("'err' should be the *name* of the column with the error estimates in obs or NULL") + ierr <- which(colnames(obs) == err) # only one + if (length(ierr) == 0) + stop(paste("Column with error estimates not found in observations", err)) + } + + ## The dependent variables + type <- 1 # data input type: type 2 is table format, type 1 is long format... + + if (!is.null(y)) { # it is in table format; first column are names of observed data... + + Names <- as.character(unique(obs[, 1])) # Names of data sets, all data should be model variables... + Ndat <- length(Names) # Number of data sets + ilist <- 1:Ndat + if (! is.character(y)) + stop ("'y' should be the *name* of the column with the values of the dependent variable in obs") + iy <- which(colnames(obs) == y) + if (length(iy) == 0) + stop(paste("Column with value of dependent variable not found in observations", y)) + type <- 2 + + } else { # it is a matrix, variable names are column names + Ndat <- NCOL(obs)-1 + Names <- colnames(obs) + ilist <- (1:NCOL(obs)) # column positions of the (dependent) observed variables + exclude <- ix # exclude columns that are not + if (ierr > 0) + exclude <- c(ix, ierr) # exclude columns that are not + if (length(exclude) > 0) + ilist <- ilist[-exclude] + } + + #================================ + # The model results + #================================ + + ModNames <- colnames(model) # Names of model variables + if (length(ix) > 1) { + ixMod <- NULL + + for ( i in 1:length(ix)) { + ix2 <- which(colnames(model) == x[i]) + if (length(ix2) == 0) + stop(paste("Cannot calculate cost: independent variable not found in model output", x[i])) + ixMod <- c(ixMod, ix2) + } + + xMod <- model[,ixMod] # Independent variable, model + } else if (length(ix) == 1) { + ixMod <- which(colnames(model) == x) + if (length(ixMod) == 0) + stop(paste("Cannot calculate cost: independent variable not found in model output", x)) + xMod <- model[,ixMod] # Independent variable, model + } + Residual <- NULL + CostVar <- NULL + + #================================ + # Compare model and data... + #================================ + xDat <- 0 + iDat <- 1:nrow(obs) + + for (i in ilist) { # for each observed variable ... + ii <- which(ModNames == Names[i]) + if (length(ii) == 0) stop(paste("observed variable not found in model output", Names[i])) + yMod <- model[,ii] + if (type == 2) { # table format + iDat <- which (obs[,1] == Names[i]) + if (length(ix) > 0) xDat <- obs[iDat, ix] + obsdat <- obs[iDat, iy] + } else { + if (length(ix) > 0) xDat <- obs[, 1] + obsdat <- obs[,i] + } + ii <- which(is.na(obsdat)) + if (length(ii) > 0) { + xDat <- xDat[-ii] + obsdat <- obsdat[-ii] + } + + # CAKE version - Added tests for multiple non-NA values + if (length(ix) > 0 && length(unique(xMod[!is.na(xMod)]))>1 && length(yMod[!is.na(yMod)])>1) + { + ModVar <- approx(xMod, yMod, xout = xDat)$y + } + else { + cat("CakeCost Warning: Only one valid point - using mean (yMod was", yMod, ")\n") + ModVar <- mean(yMod[!is.na(yMod)]) + obsdat <- mean(obsdat) + } + iex <- which(!is.na(ModVar)) + ModVar <- ModVar[iex] + obsdat <- obsdat[iex] + xDat <- xDat[iex] + if (ierr > 0) { + Err <- obs[iDat, ierr] + Err <- Err[iex] + } else { + if (weight == "std") + Err <- sd(obsdat) + else if (weight == "mean") + Err <- mean(abs(obsdat)) + else if (weight == "none") + Err <- 1 + else + stop("error: do not recognize 'weight'; should be one of 'none', 'std', 'mean'") + } + if (any(is.na(Err))) + stop(paste("error: cannot estimate weighing for observed variable: ", Names[i])) + if (min(Err) <= 0) + stop(paste("error: weighing for observed variable is 0 or negative:", Names[i])) + if (scaleVar) + Scale <- 1/length(obsdat) + else Scale <- 1 + Res <- (ModVar- obsdat) + res <- Res/Err + resScaled <- res*Scale + Residual <- rbind(Residual, + data.frame( + name = Names[i], + x = xDat, + obs = obsdat, + mod = ModVar, + weight = 1/Err, + res.unweighted = Res, + res = res)) + + CostVar <- rbind(CostVar, + data.frame( + name = Names[i], + scale = Scale, + N = length(Res), + SSR.unweighted = sum(Res^2), + SSR.unscaled = sum(res^2), + SSR = sum(resScaled^2))) + } # end loop over all observed variables + + ## SSR + Cost <- sum(CostVar$SSR * CostVar$scale) + + Lprob <- -sum(log(pmax(0, dnorm(Residual$mod, Residual$obs, Err)))) # avoid log of negative values + #Lprob <- -sum(log(pmax(.Machine$double.xmin, dnorm(Residual$mod, Residual$obs, Err)))) #avoid log(0) + + if (! is.null(cost)) { + Cost <- Cost + cost$model + CostVar <- rbind(CostVar, cost$var) + Residual <- rbind(Residual, cost$residuals) + Lprob <- Lprob + cost$minlogp + } + out <- list(model = Cost, cost = Cost, minlogp = Lprob, var = CostVar, residuals = Residual) + class(out) <- "modCost" + return(out) +} + +## ----------------------------------------------------------------------------- +## S3 methods of modCost +## ----------------------------------------------------------------------------- + +plot.modCost<- function(x, legpos="topleft", ...) { + nvar <- nrow(x$var) + + dots <- list(...) + + dots$xlab <- if(is.null(dots$xlab)) "x" else dots$xlab + dots$ylab <- if(is.null(dots$ylab)) "weighted residuals" else dots$ylab + DotsPch <- if(is.null(dots$pch)) (16:24) else dots$pch + dots$pch <- if(is.null(dots$pch)) (16:24)[x$residuals$name] else dots$pch[x$residuals$name] + DotsCol <- if(is.null(dots$col)) (1:nvar) else dots$col + dots$col <- if(is.null(dots$col)) (1:nvar)[x$residuals$name] else dots$col[x$residuals$name] + + do.call("plot", c(alist(x$residuals$x, x$residuals$res), dots)) + +# plot(x$residuals$x, x$residuals$res, xlab="x", ylab="weighted residuals", +# pch=c(16:24)[x$residuals$name],col=c(1:nvar)[x$residuals$name],...) + + if (! is.na(legpos)) + legend(legpos, legend = x$var$name, col = DotsCol, pch = DotsPch) +} + +## ----------------------------------------------------------------------------- +## Internal cost function for optimisers +## ----------------------------------------------------------------------------- +# Cost function. The returned structure must have $model +# Called from the middle of IrlsFit and OlsFit +# We need to preserve state between calls so make a closure +CakeInternalCostFunctions <- function(mkinmod, state.ini.optim, state.ini.optim.boxnames, + state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, + quiet, plot=FALSE, atol=1e-6){ + cost.old <- 1e+100 + calls <- 0 + out_predicted <- NA + + get.predicted <- function(){ out_predicted } + + get.best.cost <- function(){ cost.old } + reset.best.cost <- function() { cost.old<<-1e+100 } + + get.calls <- function(){ calls } + set.calls <- function(newcalls){ calls <<- newcalls } + + set.error<-function(err) { observed$err <<- err } + + cost <- function(P) { + assign("calls", calls + 1, inherits = TRUE) + print(P) + if (length(state.ini.optim) > 0) { + odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed) + names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed)) + } + else odeini <- state.ini.fixed + odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], + parms.fixed) + # Ensure initial state is at time 0 + outtimes = unique(c(0,observed$time)) + out <- ode(y = odeini, times = outtimes, func = mkindiff, + parms = odeparms, atol=atol) + out_transformed <- data.frame(time = out[, "time"]) + for (var in names(mkinmod$map)) { + if (length(mkinmod$map[[var]]) == 1) { + out_transformed[var] <- out[, var] + } + else { + out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) + } + } + assign("out_predicted", out_transformed, inherits = TRUE) + mC <- CakeCost(out_transformed, observed, y = "value", + err = 'err', scaleVar = scaleVar) + mC$penalties <- CakePenalties(odeparms, out_transformed, observed) + mC$model <- mC$cost + mC$penalties; + if (mC$model < cost.old) { + if (!quiet) + cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model, + "\n") + if (plot) { + outtimes_plot = seq(min(observed$time), max(observed$time), + length.out = 100) + out_plot <- ode(y = odeini, times = outtimes_plot, + func = mkindiff, parms = odeparms, atol=atol) + out_transformed_plot <- data.frame(time = out_plot[, + "time"]) + for (var in names(mkinmod$map)) { + if (length(mkinmod$map[[var]]) == 1) { + out_transformed_plot[var] <- out_plot[, var] + } + else { + out_transformed_plot[var] <- rowSums(out_plot[, + mkinmod$map[[var]]]) + } + } + plot(0, type = "n", xlim = range(observed$time), + ylim = range(observed$value, na.rm = TRUE), + xlab = "Time", ylab = "Observed") + col_obs <- pch_obs <- 1:length(obs_vars) + names(col_obs) <- names(pch_obs) <- obs_vars + for (obs_var in obs_vars) { + points(subset(observed, name == obs_var, c(time, + value)), pch = pch_obs[obs_var], col = col_obs[obs_var]) + } + matlines(out_transformed_plot$time, out_transformed_plot[-1]) + legend("topright", inset = c(0.05, 0.05), legend = obs_vars, + col = col_obs, pch = pch_obs, lty = 1:length(pch_obs)) + } + assign("cost.old", mC$model, inherits = TRUE) + } + # HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost + if(mC$penalties > 0){ + #cat("Penalty adjustment: ",mC$penalties) + mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res) + } + + return(mC) + } + + + + list(cost=cost, + get.predicted=get.predicted, + get.calls=get.calls, set.calls=set.calls, + get.best.cost=get.best.cost, reset.best.cost=reset.best.cost, + set.error=set.error + ) +}
\ No newline at end of file |