# $Id$ # CakeSummary: Functions to calculate statistics used in the summary, # and display the summary itself. # Parts modified from package mkin # Parts Hybrid Intelligence (formerly Tessella), part of Capgemini Engineering, # for Syngenta: Copyright (C) 2011-2022 Syngenta # Tessella Project Reference: 6245, 7247, 8361, 7414, 10091 # The CAKE R modules are free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Calculate the extra decay times for the other k values in DFOP # and HS # Arguments # type - type of compartment # parms.all - list of parameters CakeExtraDT<-function(type, obs_var, parms.all) { DT50k1 = NA DT50k2 = NA if (type == "DFOP"){ k1_name = paste("k1", obs_var, sep="_") k2_name = paste("k2", obs_var, sep="_") if (!is.na(parms.all[k1_name]) && !is.na(parms.all[k2_name])) { k1 = parms.all[k1_name] k2 = parms.all[k2_name] DT50k1 = log(2)/k1 DT50k2 = log(2)/k2 } } if (type == "HS"){ if (!is.na(parms.all["k1"]) && !is.na(parms.all["k2"])) { k1 = parms.all["k1"] k2 = parms.all["k2"] DT50k1 = log(2)/k1 DT50k2 = log(2)/k2 } } ret<-c(DT50k1, DT50k2) names(ret)<-c("k1", "k2") ret } # Calculate the representative decay time for an IORE model. # Arguments # obs_var - compartment name # parms.all - list of parameters CakeIORERepresentativeDT<-function(obs_var, parms.all) { repDT = NA if ("N" %in% names(parms.all)){ k_name = paste("k", obs_var, sep="_") k_tot = parms.all[[k_name]] Nval = parms.all[["N"]] M0_name = paste(obs_var, "0", sep="_") if (!(M0_name %in% names(parms.all))){ M0_name = obs_var } M0val = parms.all[[M0_name]] if (Nval == 1){ repDT = log(2)/k_tot } else{ repDT = (log(2)/log(10)) * ((10^(Nval - 1) - 1) * M0val^(1 - Nval))/((Nval - 1) * k_tot) } } repDT } # Calculate the "back-calculated" DT50 for an FOMC model. # Arguments # parms.all - list of parameters CakeFOMCBackCalculatedDT<-function(parms.all) { repDT = NA if ("alpha" %in% names(parms.all) && "beta" %in% names(parms.all)){ repDT = parms.all[["beta"]] * (10^(1/parms.all[["alpha"]]) - 1) * (log(2)/log(10)) } repDT } # Calculate the decay times for a compartment # Arguments # type - type of compartment # obs_var - compartment name # parms.all - list of parameters # dfopDtMaxIter - the maximum amount of iterations to do for DFOP DT calculation CakeDT<-function(type, obs_var, parms.all, dfopDtMaxIter) { if (type == "SFO") { k_name = paste("k", obs_var, sep="_") k_tot = parms.all[k_name] DT50 = log(2)/k_tot DT90 = log(10)/k_tot } if (type == "FOMC") { alpha = parms.all["alpha"] beta = parms.all["beta"] DT50 = beta * (2^(1/alpha) - 1) DT90 = beta * (10^(1/alpha) - 1) } if (type == "DFOP") { k1 = parms.all[paste("k1", obs_var, sep="_")] k2 = parms.all[paste("k2", obs_var, sep="_")] g = parms.all[paste("g", obs_var, sep="_")] f <- function(t, x) { fraction <- g * exp( - k1 * t) + (1 - g) * exp( - k2 * t) (fraction - (1 - x/100))^2 } # Determine a decent starting point for numerical iteration. These are lower bounds for DT50. DT50min <- max((1 / k1) * log(g * 2), (1 / k2) * log((1 - g) * 2)) # Set the starting point for the numerical solver to a be a bit smaller than the computed minimum. DT50Initial <- DT50min * 0.9 # Results greater than 10,000 are not interesting. The R optim routine also handles very large values unreliably and can claim to converge when it is nowhere near. if (DT50min > 10000){ DT50 = ">10,000" }else{ DT50.temp <- optim(DT50Initial, f, method="Nelder-Mead", control=list(maxit=dfopDtMaxIter, abstol=1E-10), x = 50) DT50.o = DT50.temp$par DT50.converged = DT50.temp$convergence == 0 DT50 = ifelse(!DT50.converged || DT50.o <= 0, NA, DT50.o) if (DT50.converged && DT50 > 10000){ DT50 = ">10,000" } } # Determine a decent starting point for numerical iteration. These are lower bounds for DT90. DT90min <- max((1 / k1) * log(g * 10), (1 / k2) * log((1 - g) * 10)) # Set the starting point for the numerical solver to a be a bit smaller than the computed minimum. DT90Initial <- DT90min * 0.9 if (DT90min > 10000){ DT90 = ">10,000" }else{ DT90.temp <- optim(DT90Initial, f, method="Nelder-Mead", control=list(maxit=dfopDtMaxIter, abstol=1E-10), x = 90) DT90.o = DT90.temp$par DT90.converged = DT90.temp$convergence == 0 DT90 = ifelse(!DT90.converged || DT90.o <= 0, NA, DT90.o) if (DT90.converged && DT90 > 10000){ DT90 = ">10,000" } } } if (type == "HS") { k1 = parms.all["k1"] k2 = parms.all["k2"] tb = parms.all["tb"] DTx <- function(x) { DTx.a <- (log(100/(100 - x)))/k1 DTx.b <- tb + (log(100/(100 - x)) - k1 * tb)/k2 if (DTx.a < tb) DTx <- DTx.a else DTx <- DTx.b return(DTx) } DT50 <- DTx(50) DT90 <- DTx(90) } if (type == "IORE") { k_name = paste("k", obs_var, sep="_") k_tot = parms.all[k_name] Nval = parms.all["N"] M0_name = paste(obs_var, "0", sep="_") if (!(M0_name %in% names(parms.all))){ M0_name = obs_var } M0val = parms.all[M0_name] if (Nval == 1){ DT50 = log(2)/k_tot DT90 = log(10)/k_tot } else{ DT50 = ((2^(Nval - 1) - 1) * M0val^(1 - Nval))/((Nval - 1) * k_tot) DT90 = ((10^(Nval - 1) - 1) * M0val^(1 - Nval))/((Nval - 1) * k_tot) } } ret<-c(DT50, DT90) names(ret)<-c("DT50","DT90") ret } # Compute chi2 etc # Arguments # observed - data.frame of observed data # predicted_long - fitted values etc # obs_vars - compartment names # parms.optim - list of fitted parameters # state,ini.optim - list of fitted initial values # fixed - parameters that were fixed (taken from fit$fixed usually) # CakeChi2<-function(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini, fixed) { # Calculate chi2 error levels according to FOCUS (2006) # You would usually call mkinfit to fit the data, however it seems that we've already fit the data # bundle <- mkinfit(mkinmod, observed, parms.ini = parms.optim, state.ini = state.ini) #, err='err') # Somewhere else. Calling mkinfit may result in an error during fitting. So Instead we just create # the values that the new version of CakeErrMin expects. There is no class check so it's fine. bundle<-vector() bundle$predicted <- predicted_long bundle$observed <- observed bundle$obs_vars <- obs_vars bundle$par <- c(parms.optim, state.ini.optim) bundle$fixed <- fixed errmin.overall <- CakeErrMin(bundle) errmin.overall } # Compute additional statistics (r?/efficiency) of obs v pred for each compartment and the whole problem CakeAdditionalStats<-function(obsvspred){ agg<-aggregate(obsvspred[c('observed', 'err-var', 'predicted', 'residual')], list(name=obsvspred$variable), function(x){x}, simplify=FALSE) frames<-apply(agg, 1, as.data.frame); names(frames)<-agg$name; frames$all<-obsvspred t(sapply(frames, function(frame){ # This function takes a data frame for a single variable and makes the stats for it # r?: FOCUS eq 6.5 p. 97 # Efficiency: eq 6.6 p. 99 do <- frame$observed - mean(frame$observed) dp <- frame$predicted - mean(frame$predicted) r2 <- ( sum(do * dp) / sqrt(sum(do^2) * sum(dp^2)) ) ^ 2 eff <- 1 - ( sum((frame$predicted - frame$observed)^2) / sum(do^2) ) list(r2=r2, eff=eff) })) } # Summarise a fit - used for CakeIrlsFit and CakeOlsFit summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, halflives = TRUE, ff = TRUE, cov = FALSE,...) { param <- object$par pnames <- names(param) p <- length(param) # If the Hessian comes back as the identity matrix, there are no residuals and so we can't provide statistics if(object$hessian==diag(dim(object$hessian)[1])) { covar=NULL } else { covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance } rdf <- object$df.residual compartmentsFixedConcentration <- gsub("_0$", "", rownames(subset(object$fixed, type=="state"))) # As for chi squared values, observations at time 0 should not count towards number of degrees of freedom if the corresponding # initial concentration is 0. for (compartment in compartmentsFixedConcentration){ timeZeroObservations <- subset(object$data, time==0 & variable==compartment) if (length(row.names(timeZeroObservations)) > 0){ rdf <- rdf - length(row.names(timeZeroObservations)) } } resvar <- object$ssr / rdf modVariance <- object$ssr / length(object$residuals) if (!is.numeric(covar)) { covar=NULL } else { message <- "ok" rownames(covar) <- colnames(covar) <-pnames se <- sqrt(diag(covar) * resvar) names(se) <- pnames tval <- param / se } if (!all(object$start$lower >=0)) { message <- "Note that the one-sided t-test may not be appropriate if parameter values below zero are possible." warning(message) } else message <- "ok" if(cov && !is.null(covar)) { # Filter the values for t-test, only apply t-test to k-values t.names <- grep("k(\\d+)|k_(.*)", names(tval), value = TRUE) t.rest <- setdiff(names(tval), t.names) t.values <- c(tval) t.values[t.rest] <- NA t.result <- pt(t.values, rdf, lower.tail = FALSE) # calculate the 90% confidence interval alpha <- 0.10 lci90 <- c(param) + qt(alpha/2,rdf)*se uci90 <- c(param) + qt(1-alpha/2,rdf)*se # calculate the 95% confidence interval alpha <- 0.05 lci95 <- c(param) + qt(alpha/2,rdf)*se uci95 <- c(param) + qt(1-alpha/2,rdf)*se param <- cbind(param, se, tval, t.result, lci90, uci90, lci95, uci95) dimnames(param) <- list(pnames, c("Estimate", "Std. Error", "t value", "Pr(>t)", "Lower CI (90%)", "Upper CI (90%)", "Lower CI (95%)", "Upper CI (95%)")) ans <- list(ssr = object$ssr, residuals = object$residuals, residualVariance = resvar, sigma = sqrt(resvar), modVariance = modVariance, df = c(p, rdf), cov.unscaled = covar, cov.scaled = covar * resvar, info = object$info, niter = object$iterations, stopmess = message, par = param) } else { param <- cbind(param) ans <- list(ssr = object$ssr, residuals = object$residuals, residualVariance = resvar, sigma = sqrt(resvar), modVariance = modVariance, df = c(p, rdf), info = object$info, niter = object$iterations, stopmess = message, par = param) } ans$diffs <- object$diffs if(data) { ans$data <- object$data ans$additionalstats = CakeAdditionalStats(object$data) } ans$start <- object$start ans$fixed <- object$fixed ans$errmin <- object$errmin ans$penalties <- object$penalties if(distimes) ans$distimes <- object$distimes if(halflives){ ans$extraDT50 <- object$extraDT50 ans$ioreRepDT <- object$ioreRepDT ans$fomcRepDT <- object$fomcRepDT } if(ff) ans$ff <- object$ff if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.cakeModFit") return(ans) } # Print a summary. Used for CakeIrlsFit, CakeOlsFit and CakeMcmcFit # Expanded from print.summary.cakeModFit print.summary.CakeFit <- function(x, digits = max(3, getOption("digits") - 3), ...) { cat("\nEquations:\n") print(noquote(as.character(x[["diffs"]]))) df <- x$df rdf <- df[2] cat("\nStarting values for optimised parameters:\n") print(x$start) cat("\nFixed parameter values:\n") if(length(x$fixed$value) == 0) cat("None\n") else print(x$fixed) if ( !is.null(x$par) ) { cat("\nOptimised parameters:\n") printCoefmat(x$par, digits = digits, ...) } cat("\nSum of squared residuals:", format(signif(x$ssr, digits))) cat("\nResidual standard error:", format(signif(x$sigma, digits)), "on", rdf, "degrees of freedom\n") cat("\nChi2 error levels in percent:\n") x$errmin$err.min <- 100 * x$errmin$err.min print(x$errmin, digits=digits,...) printdistimes <- !is.null(x$distimes) if(printdistimes){ cat("\nEstimated disappearance times:\n") print(x$distimes, digits=digits,...) } printextraDT50 <- !is.null(x$extraDT50) if(printextraDT50){ cat("\nHalf Lives for Extra k values:\n") print(x$extraDT50, digits=digits,...) } printIoreRepDT <- !is.null(x$ioreRepDT) if (printIoreRepDT){ cat("\nRepresentative Half Life for IORE:", format(signif(x$ioreRepDT, digits))) } printFomcRepDT <- !is.null(x$fomcRepDT) if (printFomcRepDT){ cat("\nBack-calculated Half Life for FOMC:", format(signif(x$fomcRepDT, digits))) } printff <- !is.null(x$ff) if(printff){ cat("\nEstimated formation fractions:\n") print(data.frame(ff = x$ff), digits=digits,...) } printcor <- !is.null(x$cov.unscaled) if (printcor){ Corr <- cov2cor(x$cov.unscaled) rownames(Corr) <- colnames(Corr) <- rownames(x$par) cat("\nParameter correlation:\n") print(Corr, digits = digits, ...) } printdata <- !is.null(x$data) if (printdata){ cat("\nData:\n") print(format(x$data, digits = digits, scientific = FALSE,...), row.names = FALSE) } if(!is.null(x$additionalstats)){ cat("\nAdditional Stats:\n"); print(x$additionalstats, digits=digits, ...) } if(!(is.null(x$penalties) || 0 == dim(x$penalties)[[1]])){ cat("\nPenalties:\n"); print(x$penalties, digits=digits, row.names = FALSE, ...) } if ( !is.null(x$seed) ) { cat("\nRandom Seed:", x$seed, "\n") } invisible(x) }