## -----------------------------------------------------------------------------
## The model cost and residuals
## -----------------------------------------------------------------------------
# Some of the CAKE R modules are based on mkin.
# Call to approx is only performed if there are multiple non NA values
# which should prevent most of the crashes - Rob Nelson (Tessella)
#
# Modifications developed by Hybrid Intelligence (formerly Tessella), part of
# Capgemini Engineering, for Syngenta, Copyright (C) 2011-2022 Syngenta
# Tessella Project Reference: 6245, 7247, 8361, 7414, 10091
#
# The CAKE R modules are free software: you can
# redistribute them and/or modify them under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>
CakeCost <- function (model, obs, x = "time", y = NULL, err = NULL,
weight = "none", scaleVar = FALSE, cost = NULL, ...) {
## Sometimes a fit is encountered for which the model is unable to calculate
## values on the full range of observed values. In this case, we will return
## an infinite cost to ensure this value is not selected.
modelCalculatedFully <- all(unlist(obs[x]) %in% unlist(model[x]))
## convert vector to matrix
if (is.vector(obs)) {
cn <- names(obs)
obs <- matrix(data = obs, nrow = 1)
colnames(obs) <- cn
}
if (is.vector(model)) {
cn <- names(model)
model <- matrix(data=model, nrow = 1)
colnames(model) <- cn
}
##=============================================================================
## Observations
##=============================================================================
## The position of independent variable(s)
ix <- 0
if (! is.null(x)) { # mapping required...
## For now multiple independent variables are not supported...
if (length(x) > 1)
stop ("multiple independent variables in 'obs' are not yet supported")
if (! is.character(x))
stop ("'x' should be the *name* of the column with the independent variable in 'obs' or NULL")
ix <- which(colnames(obs) %in% x)
if (length(ix) != length(x))
stop(paste("Independent variable column not found in observations", x))
} else ix <- NULL
## The position of weighing values
ierr <- 0
if (! is.null(err)) {
if (! is.character(err))
stop ("'err' should be the *name* of the column with the error estimates in obs or NULL")
ierr <- which(colnames(obs) == err) # only one
if (length(ierr) == 0)
stop(paste("Column with error estimates not found in observations", err))
}
## The dependent variables
type <- 1 # data input type: type 2 is table format, type 1 is long format...
if (!is.null(y)) { # it is in table format; first column are names of observed data...
Names <- as.character(unique(obs[, 1])) # Names of data sets, all data should be model variables...
Ndat <- length(Names) # Number of data sets
ilist <- 1:Ndat
if (! is.character(y))
stop ("'y' should be the *name* of the column with the values of the dependent variable in obs")
iy <- which(colnames(obs) == y)
if (length(iy) == 0)
stop(paste("Column with value of dependent variable not found in observations", y))
type <- 2
} else { # it is a matrix, variable names are column names
Ndat <- NCOL(obs)-1
Names <- colnames(obs)
ilist <- (1:NCOL(obs)) # column positions of the (dependent) observed variables
exclude <- ix # exclude columns that are not
if (ierr > 0)
exclude <- c(ix, ierr) # exclude columns that are not
if (length(exclude) > 0)
ilist <- ilist[-exclude]
}
#================================
# The model results
#================================
ModNames <- colnames(model) # Names of model variables
if (length(ix) > 1) {
ixMod <- NULL
for ( i in 1:length(ix)) {
ix2 <- which(colnames(model) == x[i])
if (length(ix2) == 0)
stop(paste("Cannot calculate cost: independent variable not found in model output", x[i]))
ixMod <- c(ixMod, ix2)
}
xMod <- model[,ixMod] # Independent variable, model
} else if (length(ix) == 1) {
ixMod <- which(colnames(model) == x)
if (length(ixMod) == 0)
stop(paste("Cannot calculate cost: independent variable not found in model output", x))
xMod <- model[,ixMod] # Independent variable, model
}
Residual <- NULL
CostVar <- NULL
#================================
# Compare model and data...
#================================
xDat <- 0
iDat <- 1:nrow(obs)
for (i in ilist) { # for each observed variable ...
ii <- which(ModNames == Names[i])
if (length(ii) == 0) stop(paste("observed variable not found in model output", Names[i]))
yMod <- model[, ii]
if (type == 2) { # table format
iDat <- which (obs[,1] == Names[i])
if (length(ix) > 0) xDat <- obs[iDat, ix]
obsdat <- obs[iDat, iy]
} else {
if (length(ix) > 0) xDat <- obs[, 1]
obsdat <- obs[,i]
}
ii <- which(is.na(obsdat))
if (length(ii) > 0) {
xDat <- xDat[-ii]
obsdat <- obsdat[-ii]
}
# CAKE version - Added tests for multiple non-NA values
if (length(ix) > 0 && length(unique(xMod[!is.na(xMod)]))>1 && length(yMod[!is.na(yMod)])>1)
{
ModVar <- approx(xMod, yMod, xout = xDat)$y
}
else {
cat("CakeCost Warning: Only one valid point - using mean (yMod was", yMod, ")\n")
ModVar <- mean(yMod[!is.na(yMod)])
obsdat <- mean(obsdat)
}
iex <- which(!is.na(ModVar))
ModVar <- ModVar[iex]
obsdat <- obsdat[iex]
xDat <- xDat[iex]
if (ierr > 0) {
Err <- obs[iDat, ierr]
Err <- Err[iex]
} else {
if (weight == "std")
Err <- sd(obsdat)
else if (weight == "mean")
Err <- mean(abs(obsdat))
else if (weight == "none")
Err <- 1
else
stop("error: do not recognize 'weight'; should be one of 'none', 'std', 'mean'")
}
if (any(is.na(Err)))
stop(paste("error: cannot estimate weighing for observed variable: ", Names[i]))
if (min(Err) <= 0)
stop(paste("error: weighing for observed variable is 0 or negative:", Names[i]))
if (scaleVar)
Scale <- 1/length(obsdat)
else Scale <- 1
if(!modelCalculatedFully){ # In this case, the model is unable to predict on the full range, set cost to Inf
xDat <- 0
obsdat <- 0
ModVar <- Inf
Res <- Inf
res <- Inf
weight_for_residual <- Inf
} else{
Res <- (ModVar- obsdat)
res <- Res / Err
weight_for_residual <- 1 / Err
}
resScaled <- res * Scale
Residual <- rbind(Residual,
data.frame(
name = Names[i],
x = xDat,
obs = obsdat,
mod = ModVar,
weight = weight_for_residual,
res.unweighted = Res,
res = res))
CostVar <- rbind(CostVar,
data.frame(
name = Names[i],
scale = Scale,
N = length(Res),
SSR.unweighted = sum(Res^2),
SSR.unscaled = sum(res^2),
SSR = sum(resScaled^2)))
} # end loop over all observed variables
## SSR
Cost <- sum(CostVar$SSR * CostVar$scale)
Lprob <- -sum(log(pmax(0, dnorm(Residual$mod, Residual$obs, Err)))) # avoid log of negative values
#Lprob <- -sum(log(pmax(.Machine$double.xmin, dnorm(Residual$mod, Residual$obs, Err)))) #avoid log(0)
if (! is.null(cost)) {
Cost <- Cost + cost$model
CostVar <- rbind(CostVar, cost$var)
Residual <- rbind(Residual, cost$residuals)
Lprob <- Lprob + cost$minlogp
}
out <- list(model = Cost, cost = Cost, minlogp = Lprob, var = CostVar, residuals = Residual)
class(out) <- "modCost"
return(out)
}
## -----------------------------------------------------------------------------
## S3 methods of modCost
## -----------------------------------------------------------------------------
plot.modCost<- function(x, legpos="topleft", ...) {
nvar <- nrow(x$var)
dots <- list(...)
dots$xlab <- if(is.null(dots$xlab)) "x" else dots$xlab
dots$ylab <- if(is.null(dots$ylab)) "weighted residuals" else dots$ylab
DotsPch <- if(is.null(dots$pch)) (16:24) else dots$pch
dots$pch <- if(is.null(dots$pch)) (16:24)[x$residuals$name] else dots$pch[x$residuals$name]
DotsCol <- if(is.null(dots$col)) (1:nvar) else dots$col
dots$col <- if(is.null(dots$col)) (1:nvar)[x$residuals$name] else dots$col[x$residuals$name]
do.call("plot", c(alist(x$residuals$x, x$residuals$res), dots))
# plot(x$residuals$x, x$residuals$res, xlab="x", ylab="weighted residuals",
# pch=c(16:24)[x$residuals$name],col=c(1:nvar)[x$residuals$name],...)
if (! is.na(legpos))
legend(legpos, legend = x$var$name, col = DotsCol, pch = DotsPch)
}
## -----------------------------------------------------------------------------
## Internal cost function for optimisers
## -----------------------------------------------------------------------------
# Cost function. The returned structure must have $model
# We need to preserve state between calls so make a closure
CakeInternalCostFunctions <- function(mkinmod, state.ini.optim, state.ini.optim.boxnames,
state.ini.fixed, parms.fixed, observed, mkindiff,
quiet, atol=1e-6, solution="deSolve", err="err"){
cost.old <- 1e+100
calls <- 0
out_predicted <- NA
get.predicted <- function(){ out_predicted }
get.best.cost <- function(){ cost.old }
reset.best.cost <- function() { cost.old<<-1e+100 }
get.calls <- function(){ calls }
set.calls <- function(newcalls){ calls <<- newcalls }
set.error<-function(err) { observed$err <<- err }
# The called cost function
cost <- function(P) {
assign("calls", calls + 1, inherits = TRUE)
print(P)
if (length(state.ini.optim) > 0) {
odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed)
names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed))
} else {
odeini <- state.ini.fixed
}
odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], parms.fixed)
# Ensure initial state is at time 0
outtimes = unique(c(0,observed$time))
odeini <- AdjustOdeInitialValues(odeini, mkinmod, odeparms)
if (solution == "analytical") {
evalparse <- function(string)
{
eval(parse(text=string), as.list(c(odeparms, odeini)))
}
parent.type = names(mkinmod$map[[1]])[1]
parent.name = names(mkinmod$diffs)[[1]]
o <- switch(parent.type,
SFO = SFO.solution(outtimes,
evalparse(parent.name),
evalparse(paste("k", parent.name, sep="_"))),
FOMC = FOMC.solution(outtimes,
evalparse(parent.name),
evalparse("alpha"), evalparse("beta")),
DFOP = DFOP.solution(outtimes,
evalparse(parent.name),
evalparse(paste("k1", parent.name, sep="_")),
evalparse(paste("k2", parent.name, sep="_")),
evalparse(paste("g", parent.name, sep="_"))),
HS = HS.solution(outtimes,
evalparse(parent.name),
evalparse("k1"), evalparse("k2"),
evalparse("tb")),
IORE = IORE.solution(outtimes,
evalparse(parent.name),
evalparse(paste("k", parent.name, sep="_")),
evalparse("N")))
out <- cbind(outtimes, o)
dimnames(out) <- list(outtimes, c("time", sub("_free", "", parent.name)))
}
if (solution == "deSolve")
{
out <- ode(y = odeini, times = outtimes, func = mkindiff, parms = odeparms, atol = atol)
}
out_transformed <- PostProcessOdeOutput(out, mkinmod, atol)
assign("out_predicted", out_transformed, inherits = TRUE)
mC <- CakeCost(out_transformed, observed, y = "value", err = err, scaleVar = FALSE)
mC$penalties <- CakePenalties(odeparms, out_transformed, observed)
mC$model <- mC$cost + mC$penalties
if (mC$model < cost.old) {
if (!quiet) {
cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model, "\n")
}
assign("cost.old", mC$model, inherits = TRUE)
}
# HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost
if(mC$penalties > 0){
mC$residuals$res <- mC$residuals$res + (sign(mC$residuals$res) * mC$penalties / length(mC$residuals$res))
}
return(mC)
}
list(cost=cost,
get.predicted=get.predicted,
get.calls=get.calls, set.calls=set.calls,
get.best.cost=get.best.cost, reset.best.cost=reset.best.cost,
set.error=set.error
)
}