# $Id$
# CakeSummary: Functions to calculate statistics used in the summary,
# and display the summary itself.
# Parts modified from package mkin
# Parts Tessella (Rob Nelson/Richard Smith/Tamar Christina) for Syngenta: Copyright (C) 2011 Syngenta
# Tessella Project Reference: 6245, 7247
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.”
# Calculate the extra decay times for the other k values in DFOP
# and HS
# Arguments
# type - type of compartment
# parms.all - list of parameters
CakeExtraDT<-function(type, parms.all) {
DT50k1 = NA
DT50k2 = NA
if (!is.na(parms.all["k1"]) && !is.na(parms.all["k2"])) {
k1 = parms.all["k1"]
k2 = parms.all["k2"]
DT50k1 = log(2)/k1
DT50k2 = log(2)/k2
}
ret<-c(DT50k1, DT50k2)
names(ret)<-c("k1", "k2")
ret
}
# Calculate the decay times for a compartment
# Arguments
# type - type of compartment
# obs_var - compartment name
# parms.all - list of parameters
# sannMaxIter - the maximum amount of iterations to do for SANN
CakeDT<-function(type, obs_var, parms.all, sannMaxIter) {
if (type == "SFO") {
# Changed to new parameterisation
#k_names = grep(paste("k", obs_var, sep="_"), names(parms.all), value=TRUE)
k_name = paste("k", obs_var, sep="_")
k_tot = parms.all[k_name]
DT50 = log(2)/k_tot
DT90 = log(10)/k_tot
}
if (type == "FOMC") {
alpha = parms.all["alpha"]
beta = parms.all["beta"]
DT50 = beta * (2^(1/alpha) - 1)
DT90 = beta * (10^(1/alpha) - 1)
}
if (type == "DFOP") {
k1 = parms.all["k1"]
k2 = parms.all["k2"]
g = parms.all["g"]
f <- function(t, x) {
fraction <- g * exp( - k1 * t) + (1 - g) * exp( - k2 * t)
(fraction - (1 - x/100))^2
}
DTminBounds <- 0.001
DTminInitial <- 1
DT50.temp <- optim(DTminInitial, f, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 50, lower = DTminBounds)
DT50.o = DT50.temp$par
DT50.converged = DT50.temp$convergence == 0
DT50 = ifelse(!DT50.converged, NA, DT50.o)
DT90.temp <- optim(DTminInitial, f, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 90, lower = DTminBounds)
DT90.o = DT90.temp$par
DT90.converged = DT90.temp$convergence == 0
DT90 = ifelse(!DT90.converged, NA, DT90.o)
}
if (type == "HS") {
k1 = parms.all["k1"]
k2 = parms.all["k2"]
tb = parms.all["tb"]
DTx <- function(x) {
DTx.a <- (log(100/(100 - x)))/k1
DTx.b <- tb + (log(100/(100 - x)) - k1 * tb)/k2
if (DTx.a < tb) DTx <- DTx.a
else DTx <- DTx.b
return(DTx)
}
DT50 <- DTx(50)
DT90 <- DTx(90)
}
if (type == "SFORB") {
# FOCUS kinetics (2006), p. 60 f
k_out_names = grep(paste("k", obs_var, "free", sep="_"), names(parms.all), value=TRUE)
k_out_names = setdiff(k_out_names, paste("k", obs_var, "free", "bound", sep="_"))
k_1output = sum(parms.all[k_out_names])
k_12 = parms.all[paste("k", obs_var, "free", "bound", sep="_")]
k_21 = parms.all[paste("k", obs_var, "bound", "free", sep="_")]
sqrt_exp = sqrt(1/4 * (k_12 + k_21 + k_1output)^2 + k_12 * k_21 - (k_12 + k_1output) * k_21)
b1 = 0.5 * (k_12 + k_21 + k_1output) + sqrt_exp
b2 = 0.5 * (k_12 + k_21 + k_1output) - sqrt_exp
SFORB_fraction = function(t) {
((k_12 + k_21 - b1)/(b2 - b1)) * exp(-b1 * t) +
((k_12 + k_21 - b2)/(b1 - b2)) * exp(-b2 * t)
}
f_50 <- function(t) (SFORB_fraction(t) - 0.5)^2
DTmin <- 0.001
DT50.temp <- optim(DTmin, f_50, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 50, lower = DTmin)
DT50.o = DT50.temp$par
DT50.converged = DT50.temp$convergence == 0
if (!DT50.converged) DT50 = NA else DT50 = DT50.o
f_90 <- function(t) (SFORB_fraction(t) - 0.1)^2
DT90.temp <- optim(DTmin, f_90, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 90, lower = DTmin)
DT90.o = DT90.temp$par
DT90.converged = DT90.temp$convergence == 0
if (!DT90.converged) DT90 = NA else DT90 = DT90.o
}
ret<-c(DT50, DT90)
names(ret)<-c("DT50","DT90")
ret
}
# Compute chi2 etc
# Arguments
# observed - data.frame of observed data
# predicted_long - fitted values etc
# obs_vars - compartment names
# parms.optim - list of fitted parameters
# state,ini.optim - list of fitted initial values
#
CakeChi2<-function(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini) {
# Calculate chi2 error levels according to FOCUS (2006)
if(getRversion() >= '2.14.0'){
# You would usually call mkinfit to fit the data, however it seems that we've already fit the data
# bundle <- mkinfit(mkinmod, observed, parms.ini = parms.optim, state.ini = state.ini) #, err='err')
# Somewhere else. Calling mkinfit may result in an error during fitting. So Instead we just create
# the values that the new version of mkinerrmin expects. There is no class check so it's fine.
bundle<-vector()
bundle$predicted <- predicted_long
bundle$observed <- observed
bundle$obs_vars <- obs_vars
bundle$par <- c(parms.optim, state.ini.optim)
errmin.overall <- mkinerrmin(bundle)
errmin.overall
} else {
means <- aggregate(value ~ time + name, data = observed, mean, na.rm=TRUE)
errdata <- merge(means, predicted_long, by = c("time", "name"), suffixes = c("_mean", "_pred"))
errdata <- errdata[order(errdata$time, errdata$name), ]
errmin.overall <- mkinerrmin(errdata, length(parms.optim) + length(state.ini.optim))
errmin <- data.frame(err.min = errmin.overall$err.min,
n.optim = errmin.overall$n.optim, df = errmin.overall$df)
rownames(errmin) <- "All data"
for (obs_var in obs_vars)
{
errdata.var <- subset(errdata, name == obs_var)
n.k.optim <- length(grep(paste("k", obs_var, sep="_"), names(parms.optim)))
n.initials.optim <- length(grep(paste(obs_var, ".*", "_0", sep=""), names(state.ini.optim)))
n.optim <- n.k.optim + n.initials.optim
if ("alpha" %in% names(parms.optim)) n.optim <- n.optim + 1
if ("beta" %in% names(parms.optim)) n.optim <- n.optim + 1
if ("k1" %in% names(parms.optim)) n.optim <- n.optim + 1
if ("k2" %in% names(parms.optim)) n.optim <- n.optim + 1
if ("g" %in% names(parms.optim)) n.optim <- n.optim + 1
if ("tb" %in% names(parms.optim)) n.optim <- n.optim + 1
errmin.tmp <- mkinerrmin(errdata.var, n.optim)
errmin[obs_var, c("err.min", "n.optim", "df")] <- errmin.tmp
}
errmin
}
}
# Compute additional statistics (r²/efficiency) of obs v pred for each compartment and the whole problem
CakeAdditionalStats<-function(obsvspred){
agg<-aggregate(obsvspred[c('observed', 'err-var', 'predicted', 'residual')], list(name=obsvspred$variable), function(x){x}, simplify=FALSE)
frames<-apply(agg, 1, as.data.frame);
names(frames)<-agg$name;
frames$all<-obsvspred
t(sapply(frames, function(frame){
# This function takes a data frame for a single variable and makes the stats for it
# r²: FOCUS eq 6.5 p. 97
# Efficiency: eq 6.6 p. 99
do <- frame$observed - mean(frame$observed)
dp <- frame$predicted - mean(frame$predicted)
r2 <- ( sum(do * dp) / sqrt(sum(do^2) * sum(dp^2)) ) ^ 2
eff <- 1 - ( sum((frame$predicted - frame$observed)^2) / sum(do^2) )
list(r2=r2, eff=eff)
}))
}
# Summarise a fit - used for CakeIrlsFit and CakeOlsFit
summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, halflives = TRUE, ff = TRUE, cov = FALSE,...) {
param <- object$par
pnames <- names(param)
p <- length(param)
covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance
# covar <- try(ginv(0.5*object$hessian)) # unscaled covariance, calculate using generic inversion
rdf <- object$df.residual
resvar <- object$ssr / rdf
modVariance <- object$ssr / length(object$residuals)
# if(!is.numeric(covar)){
# covar <- try(ginv(0.5*object$hessian))
# }
if (!is.numeric(covar)) {
# message <- "Cannot estimate covariance; system is singular"
# warning(message)
# covar <- matrix(data = NA, nrow = p, ncol = p)
covar=NULL
} else {
message <- "ok"
rownames(covar) <- colnames(covar) <-pnames
se <- sqrt(diag(covar) * resvar)
names(se) <- pnames
tval <- param / se
}
if (!all(object$start$lower >=0)) {
message <- "Note that the one-sided t-test may not be appropriate if
parameter values below zero are possible."
warning(message)
} else message <- "ok"
if(cov && !is.null(covar)) {
# Filter the values for t-test, only apply t-test to k-values
t.names <- grep("k(\\d+)|k_(.*)", names(tval), value = TRUE)
t.rest <- setdiff(names(tval), t.names)
t.values <- c(tval)
t.values[t.rest] <- NA
t.result <- pt(t.values, rdf, lower.tail = FALSE)
# Now set the values we're not interested in for the lower
# and upper bound we're not interested in to NA
t.param = c(param)
t.param[t.names] <- NA
# calculate the 90% confidence interval
alpha <- 0.10
lci90 <- t.param + qt(alpha/2,rdf)*se
uci90 <- t.param + qt(1-alpha/2,rdf)*se
# calculate the 95% confidence interval
alpha <- 0.05
lci95 <- t.param + qt(alpha/2,rdf)*se
uci95 <- t.param + qt(1-alpha/2,rdf)*se
param <- cbind(param, se, tval, t.result, lci90, uci90, lci95, uci95)
dimnames(param) <- list(pnames, c("Estimate", "Std. Error",
"t value", "Pr(>t)", "Lower CI (90%)", "Upper CI (90%)", "Lower CI (95%)", "Upper CI (95%)"))
ans <- list(residuals = object$residuals,
residualVariance = resvar,
sigma = sqrt(resvar),
modVariance = modVariance,
df = c(p, rdf), cov.unscaled = covar,
cov.scaled = covar * resvar,
info = object$info, niter = object$iterations,
stopmess = message,
par = param)
} else {
param <- cbind(param)
ans <- list(residuals = object$residuals,
residualVariance = resvar,
sigma = sqrt(resvar),
modVariance = modVariance,
df = c(p, rdf),
info = object$info, niter = object$iterations,
stopmess = message,
par = param)
}
ans$diffs <- object$diffs
if(data) {
ans$data <- object$data
ans$additionalstats = CakeAdditionalStats(object$data)
}
ans$start <- object$start
ans$fixed <- object$fixed
ans$errmin <- object$errmin
ans$penalties <- object$penalties
if(distimes) ans$distimes <- object$distimes
if(halflives) ans$extraDT50 <- object$extraDT50
if(ff) ans$ff <- object$ff
if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB
class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.cakeModFit")
return(ans)
}
# Print a summary. Used for CakeIrlsFit, CakeOlsFit and CakeMcmcFit
# Expanded from print.summary.cakeModFit
print.summary.CakeFit <- function(x, digits = max(3, getOption("digits") - 3), ...) {
cat("\nEquations:\n")
print(noquote(as.character(x[["diffs"]])))
df <- x$df
rdf <- df[2]
cat("\nStarting values for optimised parameters:\n")
print(x$start)
cat("\nFixed parameter values:\n")
if(length(x$fixed$value) == 0) cat("None\n")
else print(x$fixed)
if ( !is.null(x$par) ) {
cat("\nOptimised parameters:\n")
printCoefmat(x$par, digits = digits, ...)
}
cat("\nResidual standard error:",
format(signif(x$sigma, digits)), "on", rdf, "degrees of freedom\n")
cat("\nChi2 error levels in percent:\n")
x$errmin$err.min <- 100 * x$errmin$err.min
print(x$errmin, digits=digits,...)
printdistimes <- !is.null(x$distimes)
if(printdistimes){
cat("\nEstimated disappearance times:\n")
print(x$distimes, digits=digits,...)
}
printextraDT50 <- !is.null(x$extraDT50)
if(printextraDT50){
cat("\nHalf Lives for Extra k values:\n")
print(x$extraDT50, digits=digits,...)
}
printff <- !is.null(x$ff)
if(printff){
cat("\nEstimated formation fractions:\n")
print(data.frame(ff = x$ff), digits=digits,...)
}
printSFORB <- !is.null(x$SFORB)
if(printSFORB){
cat("\nEstimated Eigenvalues of SFORB model(s):\n")
print(x$SFORB, digits=digits,...)
}
printcor <- !is.null(x$cov.unscaled)
if (printcor){
Corr <- cov2cor(x$cov.unscaled)
rownames(Corr) <- colnames(Corr) <- rownames(x$par)
cat("\nParameter correlation:\n")
print(Corr, digits = digits, ...)
}
printdata <- !is.null(x$data)
if (printdata){
cat("\nData:\n")
print(format(x$data, digits = digits, scientific = FALSE,...), row.names = FALSE)
}
if(!is.null(x$additionalstats)){
cat("\nAdditional Stats:\n");
print(x$additionalstats, digits=digits, ...)
}
if(!(is.null(x$penalties) || 0 == dim(x$penalties)[[1]])){
cat("\nPenalties:\n");
print(x$penalties, digits=digits, row.names = FALSE, ...)
}
if ( !is.null(x$seed) ) {
cat("\nRandom Seed:", x$seed, "\n")
}
invisible(x)
}