Version 3.2v3.2

author: Johannes Ranke <jranke@uni-bremen.de> 2017-10-18 11:28:39 +0200
committer: Johannes Ranke <jranke@uni-bremen.de> 2017-10-18 11:28:39 +0200
commit: 5b3daf393831acc4099e1bde3fe4527993529d74 (patch)
tree: a742cb6df0498fced89a7020467b99ad98fda468 /CakeSummary.R
parent: 3d6b4b4b8293a4a4ab6f06805e1380600373796c (diff)
1 files changed, 200 insertions, 130 deletions
diff --git a/CakeSummary.R b/CakeSummary.R
index 6c7c661..70f01e6 100644
--- a/CakeSummary.R
+++ b/CakeSummary.R
@@ -3,10 +3,10 @@
 # and display the summary itself.
 
 # Parts modified from package mkin
-# Parts Tessella (Rob Nelson/Richard Smith/Tamar Christina) for Syngenta: Copyright (C) 2011  Syngenta
-# Tessella Project Reference: 6245, 7247
+# Parts Tessella for Syngenta: Copyright (C) 2011-2016 Syngenta
+# Tessella Project Reference: 6245, 7247, 8361, 7414
 
-#    This program is free software: you can redistribute it and/or modify
+#    The CAKE R modules are free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
@@ -17,27 +17,84 @@
 #    GNU General Public License for more details.
 # 
 #    You should have received a copy of the GNU General Public License
-#    along with this program.  If not, see <http://www.gnu.org/licenses/>.”
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 # Calculate the extra decay times for the other k values in DFOP
 # and HS
 # Arguments
 #  type - type of compartment
 #  parms.all - list of parameters
-CakeExtraDT<-function(type, parms.all) {  
+CakeExtraDT<-function(type, obs_var, parms.all) {  
     DT50k1 = NA    
-    DT50k2 = NA    
-    if (!is.na(parms.all["k1"]) && !is.na(parms.all["k2"])) {
-      k1 = parms.all["k1"]
-      k2 = parms.all["k2"]
-      DT50k1 = log(2)/k1
-      DT50k2 = log(2)/k2
+    DT50k2 = NA
+    if (type == "DFOP"){
+        k1_name = paste("k1", obs_var, sep="_")
+        k2_name = paste("k2", obs_var, sep="_")
+    
+        if (!is.na(parms.all[k1_name]) && !is.na(parms.all[k2_name])) {
+          k1 = parms.all[k1_name]
+          k2 = parms.all[k2_name]
+          DT50k1 = log(2)/k1
+          DT50k2 = log(2)/k2
+        }
     }
+    
+    if (type == "HS"){
+        if (!is.na(parms.all["k1"]) && !is.na(parms.all["k2"])) {
+          k1 = parms.all["k1"]
+          k2 = parms.all["k2"]
+          DT50k1 = log(2)/k1
+          DT50k2 = log(2)/k2
+        }
+    }
+    
     ret<-c(DT50k1, DT50k2)
     names(ret)<-c("k1", "k2")
     ret
 }
 
+# Calculate the representative decay time for an IORE model.
+# Arguments
+#  obs_var - compartment name
+#  parms.all - list of parameters
+CakeIORERepresentativeDT<-function(obs_var, parms.all) {
+    repDT = NA
+    
+    if ("N" %in% names(parms.all)){
+        k_name = paste("k", obs_var, sep="_")
+        k_tot = parms.all[[k_name]]
+        Nval = parms.all[["N"]]
+        M0_name = paste(obs_var, "0", sep="_")
+        
+        if (!(M0_name %in% names(parms.all))){
+            M0_name = obs_var
+        }
+        
+        M0val = parms.all[[M0_name]]
+        if (Nval == 1){
+            repDT = log(2)/k_tot
+        }
+        else{
+            repDT = (log(2)/log(10)) * ((10^(Nval - 1) - 1) * M0val^(1 - Nval))/((Nval - 1) * k_tot)
+        }
+    }
+    
+    repDT
+}
+
+# Calculate the "back-calculated" DT50 for an FOMC model.
+# Arguments
+#  parms.all - list of parameters
+CakeFOMCBackCalculatedDT<-function(parms.all) {
+    repDT = NA
+    
+    if ("alpha" %in% names(parms.all) && "beta" %in% names(parms.all)){
+        repDT = parms.all[["beta"]] * (10^(1/parms.all[["alpha"]]) - 1) * (log(2)/log(10))
+    }
+    
+    repDT
+}
+
 # Calculate the decay times for a compartment
 # Arguments
 #  type - type of compartment
@@ -46,8 +103,6 @@ CakeExtraDT<-function(type, parms.all) {
 # sannMaxIter - the maximum amount of iterations to do for SANN
 CakeDT<-function(type, obs_var, parms.all, sannMaxIter) {    
     if (type == "SFO") {
-      # Changed to new parameterisation
-      #k_names = grep(paste("k", obs_var, sep="_"), names(parms.all), value=TRUE)
       k_name = paste("k", obs_var, sep="_")
       k_tot = parms.all[k_name]
       DT50 = log(2)/k_tot
@@ -60,24 +115,56 @@ CakeDT<-function(type, obs_var, parms.all, sannMaxIter) {
       DT90 = beta * (10^(1/alpha) - 1)
     }
     if (type == "DFOP") {
-      k1 = parms.all["k1"]
-      k2 = parms.all["k2"]
-      g = parms.all["g"]
+      k1 = parms.all[paste("k1", obs_var, sep="_")]
+      k2 = parms.all[paste("k2", obs_var, sep="_")]
+      g = parms.all[paste("g", obs_var, sep="_")]
       f <- function(t, x) {
         fraction <- g * exp( - k1 * t) + (1 - g) * exp( - k2 * t)
         (fraction - (1 - x/100))^2
       }
 
       DTminBounds <- 0.001
-      DTminInitial <- 1
-      DT50.temp <- optim(DTminInitial, f, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 50, lower = DTminBounds)
-      DT50.o = DT50.temp$par
-      DT50.converged = DT50.temp$convergence == 0
-      DT50 = ifelse(!DT50.converged, NA, DT50.o)
-      DT90.temp <- optim(DTminInitial, f, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 90, lower = DTminBounds)
-      DT90.o = DT90.temp$par
-      DT90.converged = DT90.temp$convergence == 0
-      DT90 = ifelse(!DT90.converged, NA, DT90.o)
+      
+      # Determine a decent starting point for numerical iteration.  The latter two terms are also lower bounds for DT50.
+      DT50min <- max(0.001, (1 / k1) * log(g * 2), (1 / k2) * log((1 - g) * 2)) 
+      
+      # Set the starting point for the numerical solver to a be a bit smaller than the computed minimum as the R optim function seems to get confused if the min is 
+      # greater than the answer it's trying to converge to (up to its level of accuracy).
+      DT50Initial <- DT50min * 0.9
+      
+      # Results greater than 10,000 are not interesting.  The R optim routine also handles very large values unreliably and can claim to converge when it is nowhere near.
+      if (DT50min > 10000){
+          DT50 = ">10,000"
+      }else{
+          DT50.temp <- optim(DT50Initial, f, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 50, lower = DTminBounds)
+          DT50.o = DT50.temp$par
+          DT50.converged = DT50.temp$convergence == 0
+          DT50 = ifelse(!DT50.converged || DT50.o <= 0, NA, DT50.o)
+          
+          if (DT50.converged && DT50 > 10000){
+            DT50 = ">10,000"
+          }
+      }
+      
+      # Determine a decent starting point for numerical iteration.  The latter two terms are also lower bounds for DT90.
+      DT90min <- max(0.001, (1 / k1) * log(g * 10), (1 / k2) * log((1 - g) * 10))
+      
+      # Set the starting point for the numerical solver to a be a bit smaller than the computed minimum as the R optim function seems to get confused if the min is 
+      # greater than the answer it's trying to converge to (up to its level of accuracy).
+      DT90Initial <- DT90min * 0.9
+      
+      if (DT90min > 10000){
+          DT90 = ">10,000"
+      }else{
+          DT90.temp <- optim(DT90Initial, f, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 90, lower = DTminBounds)
+          DT90.o = DT90.temp$par
+          DT90.converged = DT90.temp$convergence == 0
+          DT90 = ifelse(!DT90.converged || DT90.o <= 0, NA, DT90.o)
+          
+          if (DT90.converged && DT90 > 10000){
+            DT90 = ">10,000"
+          }
+      }
     }
     if (type == "HS") {
       k1 = parms.all["k1"]
@@ -93,34 +180,28 @@ CakeDT<-function(type, obs_var, parms.all, sannMaxIter) {
       DT50 <- DTx(50)
       DT90 <- DTx(90)
     }
-    if (type == "SFORB") {
-      # FOCUS kinetics (2006), p. 60 f
-      k_out_names = grep(paste("k", obs_var, "free", sep="_"), names(parms.all), value=TRUE)
-      k_out_names = setdiff(k_out_names, paste("k", obs_var, "free", "bound", sep="_"))
-      k_1output = sum(parms.all[k_out_names])
-      k_12 = parms.all[paste("k", obs_var, "free", "bound", sep="_")]
-      k_21 = parms.all[paste("k", obs_var, "bound", "free", sep="_")]
-
-      sqrt_exp = sqrt(1/4 * (k_12 + k_21 + k_1output)^2 + k_12 * k_21 - (k_12 + k_1output) * k_21)
-      b1 = 0.5 * (k_12 + k_21 + k_1output) + sqrt_exp
-      b2 = 0.5 * (k_12 + k_21 + k_1output) - sqrt_exp
-
-      SFORB_fraction = function(t) {
-        ((k_12 + k_21 - b1)/(b2 - b1)) * exp(-b1 * t) +
-        ((k_12 + k_21 - b2)/(b1 - b2)) * exp(-b2 * t)
-      }
-      f_50 <- function(t) (SFORB_fraction(t) - 0.5)^2
-      DTmin <- 0.001
-      DT50.temp <- optim(DTmin, f_50, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 50, lower = DTmin)
-      DT50.o = DT50.temp$par
-      DT50.converged = DT50.temp$convergence == 0
-      if (!DT50.converged) DT50 = NA else DT50 = DT50.o
-      f_90 <- function(t) (SFORB_fraction(t) - 0.1)^2
-      DT90.temp <- optim(DTmin, f_90, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 90, lower = DTmin)
-      DT90.o = DT90.temp$par
-      DT90.converged = DT90.temp$convergence == 0
-      if (!DT90.converged) DT90 = NA else DT90 = DT90.o
+    if (type == "IORE") {
+        k_name = paste("k", obs_var, sep="_")
+        k_tot = parms.all[k_name]
+        Nval = parms.all["N"]
+        M0_name = paste(obs_var, "0", sep="_")
+        
+        if (!(M0_name %in% names(parms.all))){
+            M0_name = obs_var
+        }
+        
+        M0val = parms.all[M0_name]
+        
+        if (Nval == 1){
+            DT50 = log(2)/k_tot
+            DT90 = log(10)/k_tot
+        }
+        else{
+            DT50 = ((2^(Nval - 1) - 1) * M0val^(1 - Nval))/((Nval - 1) * k_tot)
+            DT90 = ((10^(Nval - 1) - 1) * M0val^(1 - Nval))/((Nval - 1) * k_tot)
+        }
     }
+    
     ret<-c(DT50, DT90)
     names(ret)<-c("DT50","DT90")
     ret
@@ -133,68 +214,43 @@ CakeDT<-function(type, obs_var, parms.all, sannMaxIter) {
 #  obs_vars - compartment names
 #  parms.optim - list of fitted parameters
 #  state,ini.optim - list of fitted initial values
+#  fixed - parameters that were fixed (taken from fit$fixed usually)
 #
-CakeChi2<-function(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini) {
+CakeChi2<-function(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini, fixed) {
   # Calculate chi2 error levels according to FOCUS (2006)
     
-  if(getRversion() >= '2.14.0'){
     # You would usually call mkinfit to fit the data, however it seems that we've already fit the data
     # bundle <- mkinfit(mkinmod, observed, parms.ini = parms.optim, state.ini = state.ini) #, err='err') 
     # Somewhere else. Calling mkinfit may result in an error during fitting. So Instead we just create
-    # the values that the new version of mkinerrmin expects. There is no class check so it's fine.
+    # the values that the new version of CakeErrMin expects. There is no class check so it's fine.
     bundle<-vector()
     bundle$predicted <- predicted_long
     bundle$observed <- observed
     bundle$obs_vars <- obs_vars
     bundle$par <- c(parms.optim, state.ini.optim)
-    errmin.overall <- mkinerrmin(bundle)
+	bundle$fixed <- fixed
+    errmin.overall <- CakeErrMin(bundle)
     
     errmin.overall
-  } else {
-    means <- aggregate(value ~ time + name, data = observed, mean, na.rm=TRUE)
-    errdata <- merge(means, predicted_long, by = c("time", "name"), suffixes = c("_mean", "_pred"))
-    errdata <- errdata[order(errdata$time, errdata$name), ]
-    errmin.overall <- mkinerrmin(errdata, length(parms.optim) + length(state.ini.optim))
-    
-    errmin <- data.frame(err.min = errmin.overall$err.min, 
-      n.optim = errmin.overall$n.optim, df = errmin.overall$df)
-    rownames(errmin) <- "All data"
-    for (obs_var in obs_vars)
-    {
-      errdata.var <- subset(errdata, name == obs_var)
-      n.k.optim <- length(grep(paste("k", obs_var, sep="_"), names(parms.optim)))
-      n.initials.optim <- length(grep(paste(obs_var, ".*", "_0", sep=""), names(state.ini.optim)))
-      n.optim <- n.k.optim + n.initials.optim
-      if ("alpha" %in% names(parms.optim)) n.optim <- n.optim + 1
-      if ("beta" %in% names(parms.optim)) n.optim <- n.optim + 1
-      if ("k1" %in% names(parms.optim)) n.optim <- n.optim + 1
-      if ("k2" %in% names(parms.optim)) n.optim <- n.optim + 1
-      if ("g" %in% names(parms.optim)) n.optim <- n.optim + 1
-      if ("tb" %in% names(parms.optim)) n.optim <- n.optim + 1
-      errmin.tmp <- mkinerrmin(errdata.var, n.optim)
-      errmin[obs_var, c("err.min", "n.optim", "df")] <- errmin.tmp
-    }
-    errmin
-  }
 }
 
-# Compute additional statistics (r²/efficiency) of obs v pred for each compartment and the whole problem
+# Compute additional statistics (r?/efficiency) of obs v pred for each compartment and the whole problem
 CakeAdditionalStats<-function(obsvspred){
-	agg<-aggregate(obsvspred[c('observed', 'err-var', 'predicted', 'residual')], list(name=obsvspred$variable), function(x){x}, simplify=FALSE)
-	frames<-apply(agg, 1, as.data.frame);
-	names(frames)<-agg$name;
-	frames$all<-obsvspred
-	
-	t(sapply(frames, function(frame){
-		# This function takes a data frame for a single variable and makes the stats for it
-		# r²: FOCUS eq 6.5 p. 97
-		# Efficiency: eq 6.6 p. 99
-		do <- frame$observed - mean(frame$observed)
-		dp <- frame$predicted - mean(frame$predicted)
-		r2 <- ( sum(do * dp) / sqrt(sum(do^2) * sum(dp^2)) ) ^ 2
-		eff <- 1 - ( sum((frame$predicted - frame$observed)^2) / sum(do^2) )
-		list(r2=r2, eff=eff)
-	}))
+    agg<-aggregate(obsvspred[c('observed', 'err-var', 'predicted', 'residual')], list(name=obsvspred$variable), function(x){x}, simplify=FALSE)
+    frames<-apply(agg, 1, as.data.frame);
+    names(frames)<-agg$name;
+    frames$all<-obsvspred
+    
+    t(sapply(frames, function(frame){
+        # This function takes a data frame for a single variable and makes the stats for it
+        # r?: FOCUS eq 6.5 p. 97
+        # Efficiency: eq 6.6 p. 99
+        do <- frame$observed - mean(frame$observed)
+        dp <- frame$predicted - mean(frame$predicted)
+        r2 <- ( sum(do * dp) / sqrt(sum(do^2) * sum(dp^2)) ) ^ 2
+        eff <- 1 - ( sum((frame$predicted - frame$observed)^2) / sum(do^2) )
+        list(r2=r2, eff=eff)
+    }))
 }
 
 # Summarise a fit - used for CakeIrlsFit and CakeOlsFit
@@ -202,22 +258,33 @@ summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, halflives = TR
   param  <- object$par
   pnames <- names(param)
   p      <- length(param)
-  covar  <- try(solve(0.5*object$hessian), silent = TRUE)   # unscaled covariance
-  # covar  <- try(ginv(0.5*object$hessian))   # unscaled covariance, calculate using generic inversion
+  
+  # If the Hessian comes back as the identity matrix, there are no residuals and so we can't provide statistics
+  if(object$hessian==diag(dim(object$hessian)[1])) {
+    covar=NULL
+  } else {
+    covar  <- try(solve(0.5*object$hessian), silent = TRUE)   # unscaled covariance
+  }
 
   rdf    <- object$df.residual
+  
+  compartmentsFixedConcentration <- gsub("_0$", "", rownames(subset(object$fixed, type=="state")))
+  
+  # As for chi squared values, observations at time 0 should not count towards number of degrees of freedom if the corresponding
+  # initial concentration is 0.
+  for (compartment in compartmentsFixedConcentration){
+    timeZeroObservations <- subset(object$data, time==0 & variable==compartment)
+    
+    if (length(row.names(timeZeroObservations)) > 0){
+        rdf <- rdf - length(row.names(timeZeroObservations))
+    }
+  }
+  
   resvar <- object$ssr / rdf
   modVariance <- object$ssr / length(object$residuals)
-    
-  # if(!is.numeric(covar)){
-   # covar  <- try(ginv(0.5*object$hessian))
-  # }
   
   if (!is.numeric(covar)) {
-#    message <- "Cannot estimate covariance; system is singular"
-#    warning(message)
-#    covar <- matrix(data = NA, nrow = p, ncol = p)
-   covar=NULL
+    covar=NULL
   } else {
     message <- "ok"
     rownames(covar) <- colnames(covar) <-pnames
@@ -233,7 +300,6 @@ summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, halflives = TR
   } else message <- "ok"
 
   if(cov && !is.null(covar)) { 
-
     # Filter the values for t-test, only apply t-test to k-values  
     t.names  <- grep("k(\\d+)|k_(.*)", names(tval), value = TRUE)
     t.rest   <- setdiff(names(tval), t.names)
@@ -241,19 +307,15 @@ summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, halflives = TR
     t.values[t.rest] <- NA
     t.result <- pt(t.values, rdf, lower.tail = FALSE)
     
-    # Now set the values we're not interested in for the lower 
-    # and upper bound we're not interested in to NA
-    t.param = c(param)
-    t.param[t.names] <- NA
     # calculate the 90% confidence interval
     alpha <- 0.10
-    lci90 <- t.param + qt(alpha/2,rdf)*se
-    uci90 <- t.param + qt(1-alpha/2,rdf)*se
+    lci90 <- c(param) + qt(alpha/2,rdf)*se
+    uci90 <- c(param) + qt(1-alpha/2,rdf)*se
     
     # calculate the 95% confidence interval
     alpha <- 0.05
-    lci95 <- t.param + qt(alpha/2,rdf)*se
-    uci95 <- t.param + qt(1-alpha/2,rdf)*se
+    lci95 <- c(param) + qt(alpha/2,rdf)*se
+    uci95 <- c(param) + qt(1-alpha/2,rdf)*se
 
     param <- cbind(param, se, tval, t.result, lci90, uci90, lci95, uci95)
     dimnames(param) <- list(pnames, c("Estimate", "Std. Error",
@@ -282,15 +344,19 @@ summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, halflives = TR
 
   ans$diffs <- object$diffs
   if(data) {
-	ans$data <- object$data
-	ans$additionalstats = CakeAdditionalStats(object$data)
+    ans$data <- object$data
+    ans$additionalstats = CakeAdditionalStats(object$data)
   }
   ans$start <- object$start
   ans$fixed <- object$fixed
   ans$errmin <- object$errmin 
   ans$penalties <- object$penalties
   if(distimes) ans$distimes <- object$distimes
-  if(halflives) ans$extraDT50 <- object$extraDT50
+  if(halflives){ 
+    ans$extraDT50 <- object$extraDT50
+    ans$ioreRepDT <- object$ioreRepDT
+    ans$fomcRepDT <- object$fomcRepDT
+  }
   if(ff) ans$ff <- object$ff
   if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB
   class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.cakeModFit") 
@@ -334,20 +400,24 @@ print.summary.CakeFit <- function(x, digits = max(3, getOption("digits") - 3), .
   if(printextraDT50){
     cat("\nHalf Lives for Extra k values:\n")
     print(x$extraDT50, digits=digits,...)
+  }
+
+  printIoreRepDT <- !is.null(x$ioreRepDT)
+  if (printIoreRepDT){
+    cat("\nRepresentative Half Life for IORE:", format(signif(x$ioreRepDT, digits)))
   }  
 
+  printFomcRepDT <- !is.null(x$fomcRepDT)
+  if (printFomcRepDT){
+    cat("\nBack-calculated Half Life for FOMC:", format(signif(x$fomcRepDT, digits)))
+  }
+
   printff <- !is.null(x$ff)
   if(printff){
     cat("\nEstimated formation fractions:\n")
     print(data.frame(ff = x$ff), digits=digits,...)
   }    
 
-  printSFORB <- !is.null(x$SFORB)
-  if(printSFORB){
-    cat("\nEstimated Eigenvalues of SFORB model(s):\n")
-    print(x$SFORB, digits=digits,...)
-  }    
-
   printcor <- !is.null(x$cov.unscaled)
   if (printcor){
     Corr <- cov2cor(x$cov.unscaled)
@@ -363,13 +433,13 @@ print.summary.CakeFit <- function(x, digits = max(3, getOption("digits") - 3), .
   }
   
   if(!is.null(x$additionalstats)){
-	cat("\nAdditional Stats:\n");
-	print(x$additionalstats, digits=digits, ...)
+    cat("\nAdditional Stats:\n");
+    print(x$additionalstats, digits=digits, ...)
   }
   
   if(!(is.null(x$penalties) || 0 == dim(x$penalties)[[1]])){
-	cat("\nPenalties:\n");
-	print(x$penalties, digits=digits, row.names = FALSE, ...)
+    cat("\nPenalties:\n");
+    print(x$penalties, digits=digits, row.names = FALSE, ...)
   }
   
   if ( !is.null(x$seed) ) {
author	Johannes Ranke <jranke@uni-bremen.de>	2017-10-18 11:28:39 +0200
committer	Johannes Ranke <jranke@uni-bremen.de>	2017-10-18 11:28:39 +0200
commit	5b3daf393831acc4099e1bde3fe4527993529d74 (patch)
tree	a742cb6df0498fced89a7020467b99ad98fda468 /CakeSummary.R
parent	3d6b4b4b8293a4a4ab6f06805e1380600373796c (diff)