## -----------------------------------------------------------------------------
## The model cost and residuals
## -----------------------------------------------------------------------------
# The CAKE R modules are based on mkin,
# Call to approx is only performed if there are multiple non NA values
# which should prevent most of the crashes - Rob Nelson (Tessella)
#
# Modifications developed by Tessella Plc for Syngenta, Copyright (C) 2011 Syngenta
# Authors: Rob Nelson, Richard Smith
# Tessella Project Reference: 6245
#
# This program is free software: you can
# redistribute them and/or modify them under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>#
#
# $Id$
CakeCost <- function (model, obs, x = "time", y = NULL, err = NULL,
weight = "none", scaleVar = FALSE, cost = NULL, ...) {
## convert vector to matrix
if (is.vector(obs)) {
cn <- names(obs)
obs <- matrix(data = obs, nrow = 1)
colnames(obs) <- cn
}
if (is.vector(model)) {
cn <- names(model)
model <- matrix(data=model, nrow = 1)
colnames(model) <- cn
}
##=============================================================================
## Observations
##=============================================================================
## The position of independent variable(s)
ix <- 0
if (! is.null(x)) { # mapping required...
## For now multiple independent variables are not supported...
if (length(x) > 1)
stop ("multiple independent variables in 'obs' are not yet supported")
if (! is.character(x))
stop ("'x' should be the *name* of the column with the independent variable in 'obs' or NULL")
ix <- which(colnames(obs) %in% x)
if (length(ix) != length(x))
stop(paste("Independent variable column not found in observations", x))
} else ix <- NULL
## The position of weighing values
ierr <- 0
if (! is.null(err)) {
if (! is.character(err))
stop ("'err' should be the *name* of the column with the error estimates in obs or NULL")
ierr <- which(colnames(obs) == err) # only one
if (length(ierr) == 0)
stop(paste("Column with error estimates not found in observations", err))
}
## The dependent variables
type <- 1 # data input type: type 2 is table format, type 1 is long format...
if (!is.null(y)) { # it is in table format; first column are names of observed data...
Names <- as.character(unique(obs[, 1])) # Names of data sets, all data should be model variables...
Ndat <- length(Names) # Number of data sets
ilist <- 1:Ndat
if (! is.character(y))
stop ("'y' should be the *name* of the column with the values of the dependent variable in obs")
iy <- which(colnames(obs) == y)
if (length(iy) == 0)
stop(paste("Column with value of dependent variable not found in observations", y))
type <- 2
} else { # it is a matrix, variable names are column names
Ndat <- NCOL(obs)-1
Names <- colnames(obs)
ilist <- (1:NCOL(obs)) # column positions of the (dependent) observed variables
exclude <- ix # exclude columns that are not
if (ierr > 0)
exclude <- c(ix, ierr) # exclude columns that are not
if (length(exclude) > 0)
ilist <- ilist[-exclude]
}
#================================
# The model results
#================================
ModNames <- colnames(model) # Names of model variables
if (length(ix) > 1) {
ixMod <- NULL
for ( i in 1:length(ix)) {
ix2 <- which(colnames(model) == x[i])
if (length(ix2) == 0)
stop(paste("Cannot calculate cost: independent variable not found in model output", x[i]))
ixMod <- c(ixMod, ix2)
}
xMod <- model[,ixMod] # Independent variable, model
} else if (length(ix) == 1) {
ixMod <- which(colnames(model) == x)
if (length(ixMod) == 0)
stop(paste("Cannot calculate cost: independent variable not found in model output", x))
xMod <- model[,ixMod] # Independent variable, model
}
Residual <- NULL
CostVar <- NULL
#================================
# Compare model and data...
#================================
xDat <- 0
iDat <- 1:nrow(obs)
for (i in ilist) { # for each observed variable ...
ii <- which(ModNames == Names[i])
if (length(ii) == 0) stop(paste("observed variable not found in model output", Names[i]))
yMod <- model[,ii]
if (type == 2) { # table format
iDat <- which (obs[,1] == Names[i])
if (length(ix) > 0) xDat <- obs[iDat, ix]
obsdat <- obs[iDat, iy]
} else {
if (length(ix) > 0) xDat <- obs[, 1]
obsdat <- obs[,i]
}
ii <- which(is.na(obsdat))
if (length(ii) > 0) {
xDat <- xDat[-ii]
obsdat <- obsdat[-ii]
}
# CAKE version - Added tests for multiple non-NA values
if (length(ix) > 0 && length(unique(xMod[!is.na(xMod)]))>1 && length(yMod[!is.na(yMod)])>1)
{
ModVar <- approx(xMod, yMod, xout = xDat)$y
}
else {
cat("CakeCost Warning: Only one valid point - using mean (yMod was", yMod, ")\n")
ModVar <- mean(yMod[!is.na(yMod)])
obsdat <- mean(obsdat)
}
iex <- which(!is.na(ModVar))
ModVar <- ModVar[iex]
obsdat <- obsdat[iex]
xDat <- xDat[iex]
if (ierr > 0) {
Err <- obs[iDat, ierr]
Err <- Err[iex]
} else {
if (weight == "std")
Err <- sd(obsdat)
else if (weight == "mean")
Err <- mean(abs(obsdat))
else if (weight == "none")
Err <- 1
else
stop("error: do not recognize 'weight'; should be one of 'none', 'std', 'mean'")
}
if (any(is.na(Err)))
stop(paste("error: cannot estimate weighing for observed variable: ", Names[i]))
if (min(Err) <= 0)
stop(paste("error: weighing for observed variable is 0 or negative:", Names[i]))
if (scaleVar)
Scale <- 1/length(obsdat)
else Scale <- 1
Res <- (ModVar- obsdat)
res <- Res/Err
resScaled <- res*Scale
# print("===========================");
# print("#Names[i]");
# print(Names[i]);
# print("xDat");
# print(xDat);
# print("obsdat");
# print(obsdat);
# print("ModVar");
# print(ModVar);
# print("1/Err");
# print(1/Err);
# print("Res");
# print(Res);
# print("res");
# print(res);
# print("===========================");
Residual <- rbind(Residual,
data.frame(
name = Names[i],
x = xDat,
obs = obsdat,
mod = ModVar,
weight = 1/Err,
res.unweighted = Res,
res = res))
CostVar <- rbind(CostVar,
data.frame(
name = Names[i],
scale = Scale,
N = length(Res),
SSR.unweighted = sum(Res^2),
SSR.unscaled = sum(res^2),
SSR = sum(resScaled^2)))
# print("************************")
# print(CostVar)
# print("************************")
} # end loop over all observed variables
## SSR
Cost <- sum(CostVar$SSR * CostVar$scale)
Lprob <- -sum(log(pmax(0, dnorm(Residual$mod, Residual$obs, Err)))) # avoid log of negative values
#Lprob <- -sum(log(pmax(.Machine$double.xmin, dnorm(Residual$mod, Residual$obs, Err)))) #avoid log(0)
if (! is.null(cost)) {
Cost <- Cost + cost$model
CostVar <- rbind(CostVar, cost$var)
Residual <- rbind(Residual, cost$residuals)
Lprob <- Lprob + cost$minlogp
}
out <- list(model = Cost, cost = Cost, minlogp = Lprob, var = CostVar, residuals = Residual)
class(out) <- "modCost"
return(out)
}
## -----------------------------------------------------------------------------
## S3 methods of modCost
## -----------------------------------------------------------------------------
plot.modCost<- function(x, legpos="topleft", ...) {
nvar <- nrow(x$var)
dots <- list(...)
dots$xlab <- if(is.null(dots$xlab)) "x" else dots$xlab
dots$ylab <- if(is.null(dots$ylab)) "weighted residuals" else dots$ylab
DotsPch <- if(is.null(dots$pch)) (16:24) else dots$pch
dots$pch <- if(is.null(dots$pch)) (16:24)[x$residuals$name] else dots$pch[x$residuals$name]
DotsCol <- if(is.null(dots$col)) (1:nvar) else dots$col
dots$col <- if(is.null(dots$col)) (1:nvar)[x$residuals$name] else dots$col[x$residuals$name]
do.call("plot", c(alist(x$residuals$x, x$residuals$res), dots))
# plot(x$residuals$x, x$residuals$res, xlab="x", ylab="weighted residuals",
# pch=c(16:24)[x$residuals$name],col=c(1:nvar)[x$residuals$name],...)
if (! is.na(legpos))
legend(legpos, legend = x$var$name, col = DotsCol, pch = DotsPch)
}
## -----------------------------------------------------------------------------
## Internal cost function for optimisers
## -----------------------------------------------------------------------------
# Cost function. The returned structure must have $model
# Called from the middle of IrlsFit and OlsFit
# We need to preserve state between calls so make a closure
CakeInternalCostFunctions <- function(mkinmod, state.ini.optim, state.ini.optim.boxnames,
state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar,
quiet, plot=FALSE, atol=1e-6){
cost.old <- 1e+100
calls <- 0
out_predicted <- NA
get.predicted <- function(){ out_predicted }
get.best.cost <- function(){ cost.old }
reset.best.cost <- function() { cost.old<<-1e+100 }
get.calls <- function(){ calls }
set.calls <- function(newcalls){ calls <<- newcalls }
set.error<-function(err) { observed$err <<- err }
cost <- function(P) {
assign("calls", calls + 1, inherits = TRUE)
print(P)
if (length(state.ini.optim) > 0) {
odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed)
names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed))
}
else odeini <- state.ini.fixed
odeparms <- c(P[(length(state.ini.optim) + 1):length(P)],
parms.fixed)
# Ensure initial state is at time 0
outtimes = unique(c(0,observed$time))
out <- ode(y = odeini, times = outtimes, func = mkindiff,
parms = odeparms, atol=atol)
out_transformed <- data.frame(time = out[, "time"])
for (var in names(mkinmod$map)) {
if (length(mkinmod$map[[var]]) == 1) {
out_transformed[var] <- out[, var]
}
else {
out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]])
}
}
assign("out_predicted", out_transformed, inherits = TRUE)
mC <- CakeCost(out_transformed, observed, y = "value",
err = 'err', scaleVar = scaleVar)
mC$penalties <- CakePenalties(odeparms, out_transformed, observed)
mC$model <- mC$cost + mC$penalties;
if (mC$model < cost.old) {
if (!quiet)
cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model,
"\n")
if (plot) {
outtimes_plot = seq(min(observed$time), max(observed$time),
length.out = 100)
out_plot <- ode(y = odeini, times = outtimes_plot,
func = mkindiff, parms = odeparms, atol=atol)
out_transformed_plot <- data.frame(time = out_plot[,
"time"])
for (var in names(mkinmod$map)) {
if (length(mkinmod$map[[var]]) == 1) {
out_transformed_plot[var] <- out_plot[, var]
}
else {
out_transformed_plot[var] <- rowSums(out_plot[,
mkinmod$map[[var]]])
}
}
plot(0, type = "n", xlim = range(observed$time),
ylim = range(observed$value, na.rm = TRUE),
xlab = "Time", ylab = "Observed")
col_obs <- pch_obs <- 1:length(obs_vars)
names(col_obs) <- names(pch_obs) <- obs_vars
for (obs_var in obs_vars) {
points(subset(observed, name == obs_var, c(time,
value)), pch = pch_obs[obs_var], col = col_obs[obs_var])
}
matlines(out_transformed_plot$time, out_transformed_plot[-1])
legend("topright", inset = c(0.05, 0.05), legend = obs_vars,
col = col_obs, pch = pch_obs, lty = 1:length(pch_obs))
}
assign("cost.old", mC$model, inherits = TRUE)
}
# HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost
if(mC$penalties > 0){
#cat("Penalty adjustment: ",mC$penalties)
mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res)
}
return(mC)
}
list(cost=cost,
get.predicted=get.predicted,
get.calls=get.calls, set.calls=set.calls,
get.best.cost=get.best.cost, reset.best.cost=reset.best.cost,
set.error=set.error
)
}