From 5b3daf393831acc4099e1bde3fe4527993529d74 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Wed, 18 Oct 2017 11:28:39 +0200 Subject: Version 3.2 --- CakeSummary.R | 330 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 200 insertions(+), 130 deletions(-) (limited to 'CakeSummary.R') diff --git a/CakeSummary.R b/CakeSummary.R index 6c7c661..70f01e6 100644 --- a/CakeSummary.R +++ b/CakeSummary.R @@ -3,10 +3,10 @@ # and display the summary itself. # Parts modified from package mkin -# Parts Tessella (Rob Nelson/Richard Smith/Tamar Christina) for Syngenta: Copyright (C) 2011 Syngenta -# Tessella Project Reference: 6245, 7247 +# Parts Tessella for Syngenta: Copyright (C) 2011-2016 Syngenta +# Tessella Project Reference: 6245, 7247, 8361, 7414 -# This program is free software: you can redistribute it and/or modify +# The CAKE R modules are free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. @@ -17,27 +17,84 @@ # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program. If not, see .” +# along with this program. If not, see . # Calculate the extra decay times for the other k values in DFOP # and HS # Arguments # type - type of compartment # parms.all - list of parameters -CakeExtraDT<-function(type, parms.all) { +CakeExtraDT<-function(type, obs_var, parms.all) { DT50k1 = NA - DT50k2 = NA - if (!is.na(parms.all["k1"]) && !is.na(parms.all["k2"])) { - k1 = parms.all["k1"] - k2 = parms.all["k2"] - DT50k1 = log(2)/k1 - DT50k2 = log(2)/k2 + DT50k2 = NA + if (type == "DFOP"){ + k1_name = paste("k1", obs_var, sep="_") + k2_name = paste("k2", obs_var, sep="_") + + if (!is.na(parms.all[k1_name]) && !is.na(parms.all[k2_name])) { + k1 = parms.all[k1_name] + k2 = parms.all[k2_name] + DT50k1 = log(2)/k1 + DT50k2 = log(2)/k2 + } } + + if (type == "HS"){ + if (!is.na(parms.all["k1"]) && !is.na(parms.all["k2"])) { + k1 = parms.all["k1"] + k2 = parms.all["k2"] + DT50k1 = log(2)/k1 + DT50k2 = log(2)/k2 + } + } + ret<-c(DT50k1, DT50k2) names(ret)<-c("k1", "k2") ret } +# Calculate the representative decay time for an IORE model. +# Arguments +# obs_var - compartment name +# parms.all - list of parameters +CakeIORERepresentativeDT<-function(obs_var, parms.all) { + repDT = NA + + if ("N" %in% names(parms.all)){ + k_name = paste("k", obs_var, sep="_") + k_tot = parms.all[[k_name]] + Nval = parms.all[["N"]] + M0_name = paste(obs_var, "0", sep="_") + + if (!(M0_name %in% names(parms.all))){ + M0_name = obs_var + } + + M0val = parms.all[[M0_name]] + if (Nval == 1){ + repDT = log(2)/k_tot + } + else{ + repDT = (log(2)/log(10)) * ((10^(Nval - 1) - 1) * M0val^(1 - Nval))/((Nval - 1) * k_tot) + } + } + + repDT +} + +# Calculate the "back-calculated" DT50 for an FOMC model. +# Arguments +# parms.all - list of parameters +CakeFOMCBackCalculatedDT<-function(parms.all) { + repDT = NA + + if ("alpha" %in% names(parms.all) && "beta" %in% names(parms.all)){ + repDT = parms.all[["beta"]] * (10^(1/parms.all[["alpha"]]) - 1) * (log(2)/log(10)) + } + + repDT +} + # Calculate the decay times for a compartment # Arguments # type - type of compartment @@ -46,8 +103,6 @@ CakeExtraDT<-function(type, parms.all) { # sannMaxIter - the maximum amount of iterations to do for SANN CakeDT<-function(type, obs_var, parms.all, sannMaxIter) { if (type == "SFO") { - # Changed to new parameterisation - #k_names = grep(paste("k", obs_var, sep="_"), names(parms.all), value=TRUE) k_name = paste("k", obs_var, sep="_") k_tot = parms.all[k_name] DT50 = log(2)/k_tot @@ -60,24 +115,56 @@ CakeDT<-function(type, obs_var, parms.all, sannMaxIter) { DT90 = beta * (10^(1/alpha) - 1) } if (type == "DFOP") { - k1 = parms.all["k1"] - k2 = parms.all["k2"] - g = parms.all["g"] + k1 = parms.all[paste("k1", obs_var, sep="_")] + k2 = parms.all[paste("k2", obs_var, sep="_")] + g = parms.all[paste("g", obs_var, sep="_")] f <- function(t, x) { fraction <- g * exp( - k1 * t) + (1 - g) * exp( - k2 * t) (fraction - (1 - x/100))^2 } DTminBounds <- 0.001 - DTminInitial <- 1 - DT50.temp <- optim(DTminInitial, f, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 50, lower = DTminBounds) - DT50.o = DT50.temp$par - DT50.converged = DT50.temp$convergence == 0 - DT50 = ifelse(!DT50.converged, NA, DT50.o) - DT90.temp <- optim(DTminInitial, f, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 90, lower = DTminBounds) - DT90.o = DT90.temp$par - DT90.converged = DT90.temp$convergence == 0 - DT90 = ifelse(!DT90.converged, NA, DT90.o) + + # Determine a decent starting point for numerical iteration. The latter two terms are also lower bounds for DT50. + DT50min <- max(0.001, (1 / k1) * log(g * 2), (1 / k2) * log((1 - g) * 2)) + + # Set the starting point for the numerical solver to a be a bit smaller than the computed minimum as the R optim function seems to get confused if the min is + # greater than the answer it's trying to converge to (up to its level of accuracy). + DT50Initial <- DT50min * 0.9 + + # Results greater than 10,000 are not interesting. The R optim routine also handles very large values unreliably and can claim to converge when it is nowhere near. + if (DT50min > 10000){ + DT50 = ">10,000" + }else{ + DT50.temp <- optim(DT50Initial, f, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 50, lower = DTminBounds) + DT50.o = DT50.temp$par + DT50.converged = DT50.temp$convergence == 0 + DT50 = ifelse(!DT50.converged || DT50.o <= 0, NA, DT50.o) + + if (DT50.converged && DT50 > 10000){ + DT50 = ">10,000" + } + } + + # Determine a decent starting point for numerical iteration. The latter two terms are also lower bounds for DT90. + DT90min <- max(0.001, (1 / k1) * log(g * 10), (1 / k2) * log((1 - g) * 10)) + + # Set the starting point for the numerical solver to a be a bit smaller than the computed minimum as the R optim function seems to get confused if the min is + # greater than the answer it's trying to converge to (up to its level of accuracy). + DT90Initial <- DT90min * 0.9 + + if (DT90min > 10000){ + DT90 = ">10,000" + }else{ + DT90.temp <- optim(DT90Initial, f, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 90, lower = DTminBounds) + DT90.o = DT90.temp$par + DT90.converged = DT90.temp$convergence == 0 + DT90 = ifelse(!DT90.converged || DT90.o <= 0, NA, DT90.o) + + if (DT90.converged && DT90 > 10000){ + DT90 = ">10,000" + } + } } if (type == "HS") { k1 = parms.all["k1"] @@ -93,34 +180,28 @@ CakeDT<-function(type, obs_var, parms.all, sannMaxIter) { DT50 <- DTx(50) DT90 <- DTx(90) } - if (type == "SFORB") { - # FOCUS kinetics (2006), p. 60 f - k_out_names = grep(paste("k", obs_var, "free", sep="_"), names(parms.all), value=TRUE) - k_out_names = setdiff(k_out_names, paste("k", obs_var, "free", "bound", sep="_")) - k_1output = sum(parms.all[k_out_names]) - k_12 = parms.all[paste("k", obs_var, "free", "bound", sep="_")] - k_21 = parms.all[paste("k", obs_var, "bound", "free", sep="_")] - - sqrt_exp = sqrt(1/4 * (k_12 + k_21 + k_1output)^2 + k_12 * k_21 - (k_12 + k_1output) * k_21) - b1 = 0.5 * (k_12 + k_21 + k_1output) + sqrt_exp - b2 = 0.5 * (k_12 + k_21 + k_1output) - sqrt_exp - - SFORB_fraction = function(t) { - ((k_12 + k_21 - b1)/(b2 - b1)) * exp(-b1 * t) + - ((k_12 + k_21 - b2)/(b1 - b2)) * exp(-b2 * t) - } - f_50 <- function(t) (SFORB_fraction(t) - 0.5)^2 - DTmin <- 0.001 - DT50.temp <- optim(DTmin, f_50, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 50, lower = DTmin) - DT50.o = DT50.temp$par - DT50.converged = DT50.temp$convergence == 0 - if (!DT50.converged) DT50 = NA else DT50 = DT50.o - f_90 <- function(t) (SFORB_fraction(t) - 0.1)^2 - DT90.temp <- optim(DTmin, f_90, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 90, lower = DTmin) - DT90.o = DT90.temp$par - DT90.converged = DT90.temp$convergence == 0 - if (!DT90.converged) DT90 = NA else DT90 = DT90.o + if (type == "IORE") { + k_name = paste("k", obs_var, sep="_") + k_tot = parms.all[k_name] + Nval = parms.all["N"] + M0_name = paste(obs_var, "0", sep="_") + + if (!(M0_name %in% names(parms.all))){ + M0_name = obs_var + } + + M0val = parms.all[M0_name] + + if (Nval == 1){ + DT50 = log(2)/k_tot + DT90 = log(10)/k_tot + } + else{ + DT50 = ((2^(Nval - 1) - 1) * M0val^(1 - Nval))/((Nval - 1) * k_tot) + DT90 = ((10^(Nval - 1) - 1) * M0val^(1 - Nval))/((Nval - 1) * k_tot) + } } + ret<-c(DT50, DT90) names(ret)<-c("DT50","DT90") ret @@ -133,68 +214,43 @@ CakeDT<-function(type, obs_var, parms.all, sannMaxIter) { # obs_vars - compartment names # parms.optim - list of fitted parameters # state,ini.optim - list of fitted initial values +# fixed - parameters that were fixed (taken from fit$fixed usually) # -CakeChi2<-function(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini) { +CakeChi2<-function(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini, fixed) { # Calculate chi2 error levels according to FOCUS (2006) - if(getRversion() >= '2.14.0'){ # You would usually call mkinfit to fit the data, however it seems that we've already fit the data # bundle <- mkinfit(mkinmod, observed, parms.ini = parms.optim, state.ini = state.ini) #, err='err') # Somewhere else. Calling mkinfit may result in an error during fitting. So Instead we just create - # the values that the new version of mkinerrmin expects. There is no class check so it's fine. + # the values that the new version of CakeErrMin expects. There is no class check so it's fine. bundle<-vector() bundle$predicted <- predicted_long bundle$observed <- observed bundle$obs_vars <- obs_vars bundle$par <- c(parms.optim, state.ini.optim) - errmin.overall <- mkinerrmin(bundle) + bundle$fixed <- fixed + errmin.overall <- CakeErrMin(bundle) errmin.overall - } else { - means <- aggregate(value ~ time + name, data = observed, mean, na.rm=TRUE) - errdata <- merge(means, predicted_long, by = c("time", "name"), suffixes = c("_mean", "_pred")) - errdata <- errdata[order(errdata$time, errdata$name), ] - errmin.overall <- mkinerrmin(errdata, length(parms.optim) + length(state.ini.optim)) - - errmin <- data.frame(err.min = errmin.overall$err.min, - n.optim = errmin.overall$n.optim, df = errmin.overall$df) - rownames(errmin) <- "All data" - for (obs_var in obs_vars) - { - errdata.var <- subset(errdata, name == obs_var) - n.k.optim <- length(grep(paste("k", obs_var, sep="_"), names(parms.optim))) - n.initials.optim <- length(grep(paste(obs_var, ".*", "_0", sep=""), names(state.ini.optim))) - n.optim <- n.k.optim + n.initials.optim - if ("alpha" %in% names(parms.optim)) n.optim <- n.optim + 1 - if ("beta" %in% names(parms.optim)) n.optim <- n.optim + 1 - if ("k1" %in% names(parms.optim)) n.optim <- n.optim + 1 - if ("k2" %in% names(parms.optim)) n.optim <- n.optim + 1 - if ("g" %in% names(parms.optim)) n.optim <- n.optim + 1 - if ("tb" %in% names(parms.optim)) n.optim <- n.optim + 1 - errmin.tmp <- mkinerrmin(errdata.var, n.optim) - errmin[obs_var, c("err.min", "n.optim", "df")] <- errmin.tmp - } - errmin - } } -# Compute additional statistics (r²/efficiency) of obs v pred for each compartment and the whole problem +# Compute additional statistics (r?/efficiency) of obs v pred for each compartment and the whole problem CakeAdditionalStats<-function(obsvspred){ - agg<-aggregate(obsvspred[c('observed', 'err-var', 'predicted', 'residual')], list(name=obsvspred$variable), function(x){x}, simplify=FALSE) - frames<-apply(agg, 1, as.data.frame); - names(frames)<-agg$name; - frames$all<-obsvspred - - t(sapply(frames, function(frame){ - # This function takes a data frame for a single variable and makes the stats for it - # r²: FOCUS eq 6.5 p. 97 - # Efficiency: eq 6.6 p. 99 - do <- frame$observed - mean(frame$observed) - dp <- frame$predicted - mean(frame$predicted) - r2 <- ( sum(do * dp) / sqrt(sum(do^2) * sum(dp^2)) ) ^ 2 - eff <- 1 - ( sum((frame$predicted - frame$observed)^2) / sum(do^2) ) - list(r2=r2, eff=eff) - })) + agg<-aggregate(obsvspred[c('observed', 'err-var', 'predicted', 'residual')], list(name=obsvspred$variable), function(x){x}, simplify=FALSE) + frames<-apply(agg, 1, as.data.frame); + names(frames)<-agg$name; + frames$all<-obsvspred + + t(sapply(frames, function(frame){ + # This function takes a data frame for a single variable and makes the stats for it + # r?: FOCUS eq 6.5 p. 97 + # Efficiency: eq 6.6 p. 99 + do <- frame$observed - mean(frame$observed) + dp <- frame$predicted - mean(frame$predicted) + r2 <- ( sum(do * dp) / sqrt(sum(do^2) * sum(dp^2)) ) ^ 2 + eff <- 1 - ( sum((frame$predicted - frame$observed)^2) / sum(do^2) ) + list(r2=r2, eff=eff) + })) } # Summarise a fit - used for CakeIrlsFit and CakeOlsFit @@ -202,22 +258,33 @@ summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, halflives = TR param <- object$par pnames <- names(param) p <- length(param) - covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance - # covar <- try(ginv(0.5*object$hessian)) # unscaled covariance, calculate using generic inversion + + # If the Hessian comes back as the identity matrix, there are no residuals and so we can't provide statistics + if(object$hessian==diag(dim(object$hessian)[1])) { + covar=NULL + } else { + covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance + } rdf <- object$df.residual + + compartmentsFixedConcentration <- gsub("_0$", "", rownames(subset(object$fixed, type=="state"))) + + # As for chi squared values, observations at time 0 should not count towards number of degrees of freedom if the corresponding + # initial concentration is 0. + for (compartment in compartmentsFixedConcentration){ + timeZeroObservations <- subset(object$data, time==0 & variable==compartment) + + if (length(row.names(timeZeroObservations)) > 0){ + rdf <- rdf - length(row.names(timeZeroObservations)) + } + } + resvar <- object$ssr / rdf modVariance <- object$ssr / length(object$residuals) - - # if(!is.numeric(covar)){ - # covar <- try(ginv(0.5*object$hessian)) - # } if (!is.numeric(covar)) { -# message <- "Cannot estimate covariance; system is singular" -# warning(message) -# covar <- matrix(data = NA, nrow = p, ncol = p) - covar=NULL + covar=NULL } else { message <- "ok" rownames(covar) <- colnames(covar) <-pnames @@ -233,7 +300,6 @@ summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, halflives = TR } else message <- "ok" if(cov && !is.null(covar)) { - # Filter the values for t-test, only apply t-test to k-values t.names <- grep("k(\\d+)|k_(.*)", names(tval), value = TRUE) t.rest <- setdiff(names(tval), t.names) @@ -241,19 +307,15 @@ summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, halflives = TR t.values[t.rest] <- NA t.result <- pt(t.values, rdf, lower.tail = FALSE) - # Now set the values we're not interested in for the lower - # and upper bound we're not interested in to NA - t.param = c(param) - t.param[t.names] <- NA # calculate the 90% confidence interval alpha <- 0.10 - lci90 <- t.param + qt(alpha/2,rdf)*se - uci90 <- t.param + qt(1-alpha/2,rdf)*se + lci90 <- c(param) + qt(alpha/2,rdf)*se + uci90 <- c(param) + qt(1-alpha/2,rdf)*se # calculate the 95% confidence interval alpha <- 0.05 - lci95 <- t.param + qt(alpha/2,rdf)*se - uci95 <- t.param + qt(1-alpha/2,rdf)*se + lci95 <- c(param) + qt(alpha/2,rdf)*se + uci95 <- c(param) + qt(1-alpha/2,rdf)*se param <- cbind(param, se, tval, t.result, lci90, uci90, lci95, uci95) dimnames(param) <- list(pnames, c("Estimate", "Std. Error", @@ -282,15 +344,19 @@ summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, halflives = TR ans$diffs <- object$diffs if(data) { - ans$data <- object$data - ans$additionalstats = CakeAdditionalStats(object$data) + ans$data <- object$data + ans$additionalstats = CakeAdditionalStats(object$data) } ans$start <- object$start ans$fixed <- object$fixed ans$errmin <- object$errmin ans$penalties <- object$penalties if(distimes) ans$distimes <- object$distimes - if(halflives) ans$extraDT50 <- object$extraDT50 + if(halflives){ + ans$extraDT50 <- object$extraDT50 + ans$ioreRepDT <- object$ioreRepDT + ans$fomcRepDT <- object$fomcRepDT + } if(ff) ans$ff <- object$ff if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.cakeModFit") @@ -334,20 +400,24 @@ print.summary.CakeFit <- function(x, digits = max(3, getOption("digits") - 3), . if(printextraDT50){ cat("\nHalf Lives for Extra k values:\n") print(x$extraDT50, digits=digits,...) + } + + printIoreRepDT <- !is.null(x$ioreRepDT) + if (printIoreRepDT){ + cat("\nRepresentative Half Life for IORE:", format(signif(x$ioreRepDT, digits))) } + printFomcRepDT <- !is.null(x$fomcRepDT) + if (printFomcRepDT){ + cat("\nBack-calculated Half Life for FOMC:", format(signif(x$fomcRepDT, digits))) + } + printff <- !is.null(x$ff) if(printff){ cat("\nEstimated formation fractions:\n") print(data.frame(ff = x$ff), digits=digits,...) } - printSFORB <- !is.null(x$SFORB) - if(printSFORB){ - cat("\nEstimated Eigenvalues of SFORB model(s):\n") - print(x$SFORB, digits=digits,...) - } - printcor <- !is.null(x$cov.unscaled) if (printcor){ Corr <- cov2cor(x$cov.unscaled) @@ -363,13 +433,13 @@ print.summary.CakeFit <- function(x, digits = max(3, getOption("digits") - 3), . } if(!is.null(x$additionalstats)){ - cat("\nAdditional Stats:\n"); - print(x$additionalstats, digits=digits, ...) + cat("\nAdditional Stats:\n"); + print(x$additionalstats, digits=digits, ...) } if(!(is.null(x$penalties) || 0 == dim(x$penalties)[[1]])){ - cat("\nPenalties:\n"); - print(x$penalties, digits=digits, row.names = FALSE, ...) + cat("\nPenalties:\n"); + print(x$penalties, digits=digits, row.names = FALSE, ...) } if ( !is.null(x$seed) ) { -- cgit v1.2.1