From b76e401a854021eaeda6f8ba262baf37b4ecfac2 Mon Sep 17 00:00:00 2001
From: Johannes Ranke <jranke@uni-bremen.de>
Date: Thu, 13 Oct 2022 03:51:22 +0200
Subject: Select best fit from multistart, use in parhist

- Add 'best' and 'which.best' generics with methods for multistart
objects
- Per default, scale the parameters in parhist plots using the fit with
the highest log likelihood.
---
 NAMESPACE         |  4 ++++
 R/multistart.R    | 40 +++++++++++++++++++++++++++++++++++++++-
 R/parhist.R       | 42 +++++++++++++++++++++++++++++-------------
 log/test.log      | 26 +++++++++++++-------------
 man/multistart.Rd | 20 +++++++++++++++++++-
 man/parhist.Rd    | 16 +++++++++++++---
 6 files changed, 117 insertions(+), 31 deletions(-)
diff --git a/NAMESPACE b/NAMESPACE
index f6c5ebf9..3b4d2367 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -10,6 +10,7 @@ S3method(aw,mixed.mmkin)
 S3method(aw,mkinfit)
 S3method(aw,mmkin)
 S3method(aw,multistart)
+S3method(best,default)
 S3method(confint,mkinfit)
 S3method(convergence,mhmkin)
 S3method(convergence,mmkin)
@@ -71,6 +72,7 @@ S3method(update,mkinfit)
 S3method(update,mmkin)
 S3method(update,nlme.mmkin)
 S3method(update,saem.mmkin)
+S3method(which.best,default)
 export(CAKE_export)
 export(DFOP.solution)
 export(FOMC.solution)
@@ -81,6 +83,7 @@ export(SFORB.solution)
 export(add_err)
 export(aw)
 export(backtransform_odeparms)
+export(best)
 export(convergence)
 export(create_deg_func)
 export(endpoints)
@@ -133,6 +136,7 @@ export(set_nd_nq)
 export(set_nd_nq_focus)
 export(sigma_twocomp)
 export(transform_odeparms)
+export(which.best)
 import(deSolve)
 import(graphics)
 import(nlme)
diff --git a/R/multistart.R b/R/multistart.R
index b65c0bee..a788953e 100644
--- a/R/multistart.R
+++ b/R/multistart.R
@@ -47,8 +47,10 @@
 #' f_saem_full <- saem(f_mmkin)
 #' f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16)
 #' parhist(f_saem_full_multi, lpos = "bottomright")
+#' illparms(f_saem_full)
 #'
-#' f_saem_reduced <- update(f_saem_full, covariance.model = diag(c(1, 1, 0, 1)))
+#' f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2")
+#' illparms(f_saem_reduced)
 #' # On Windows, we need to create a cluster first. When working with
 #' # such a cluster, we need to export the mmkin object to the cluster
 #' # nodes, as it is referred to when updating the saem object on the nodes.
@@ -140,3 +142,39 @@ print.multistart <- function(x, ...) {
   cat("<multistart> object with", length(x), "fits:\n")
   print(convergence(x))
 }
+
+#' @rdname multistart
+#' @export
+best <- function(object, ...)
+{
+  UseMethod("best", object)
+}
+
+#' @export
+#' @return The object with the highest likelihood
+#' @rdname multistart
+best.default <- function(object, ...)
+{
+  return(object[[which.best(object)]])
+}
+
+#' @return The index of the object with the highest likelihood
+#' @rdname multistart
+#' @export
+which.best <- function(object, ...)
+{
+  UseMethod("which.best", object)
+}
+
+#' @rdname multistart
+#' @export
+which.best.default <- function(object, ...)
+{
+  llfunc <- function(object) {
+    ret <- try(logLik(object))
+    if (inherits(ret, "try-error")) return(NA)
+    else return(ret)
+  }
+  ll <- sapply(object, llfunc)
+  return(which.max(ll))
+}
diff --git a/R/parhist.R b/R/parhist.R
index 10730873..5d498664 100644
--- a/R/parhist.R
+++ b/R/parhist.R
@@ -1,12 +1,15 @@
 #' Plot parameter distributions from multistart objects
 #'
-#' Produces a boxplot with all parameters from the multiple runs, divided by
-#' using their medians as in the paper by Duchesne et al. (2021).
+#' Produces a boxplot with all parameters from the multiple runs, scaled
+#' either by the parameters of the run with the highest likelihood,
+#' or by their medians as proposed in the paper by Duchesne et al. (2021).
 #'
 #' @param object The [multistart] object
-#' @param \dots Passed to [boxplot]
+#' @param scale By default, scale parameters using the best available fit.
+#' If 'median', parameters are scaled using the median parameters from all fits.
 #' @param main Title of the plot
 #' @param lpos Positioning of the legend.
+#' @param \dots Passed to [boxplot]
 #' @references Duchesne R, Guillemin A, Gandrillon O, Crauste F. Practical
 #' identifiability in the frame of nonlinear mixed effects models: the example
 #' of the in vitro erythropoiesis. BMC Bioinformatics. 2021 Oct 4;22(1):478.
@@ -14,7 +17,9 @@
 #' @seealso [multistart]
 #' @importFrom stats median
 #' @export
-parhist <- function(object, lpos = "bottomleft", main = "", ...) {
+parhist <- function(object, scale = c("best", "median"),
+  lpos = "bottomleft", main = "", ...)
+{
   oldpar <- par(no.readonly = TRUE)
   on.exit(par(oldpar, no.readonly = TRUE))
 
@@ -48,23 +53,34 @@ parhist <- function(object, lpos = "bottomleft", main = "", ...) {
     colnames(all_parms)[1:length(degparm_names)] <- degparm_names
   }
 
-  median_parms <- apply(all_parms, 2, median)
-  start_scaled_parms <- rep(NA_real_, length(orig_parms))
-  names(start_scaled_parms) <- names(orig_parms)
+  scale <- match.arg(scale)
+  parm_scale <- switch(scale,
+    best = all_parms[which.best(object), ],
+    median = apply(all_parms, 2, median)
+  )
 
-  orig_scaled_parms <- orig_parms / median_parms
-  all_scaled_parms <- t(apply(all_parms, 1, function(x) x / median_parms))
-  start_scaled_parms[names(start_parms)] <-
-    start_parms / median_parms[names(start_parms)]
+  # Boxplots of all scaled parameters
+  all_scaled_parms <- t(apply(all_parms, 1, function(x) x / parm_scale))
   boxplot(all_scaled_parms, log = "y", main = main, ,
     ylab = "Normalised parameters", ...)
 
-  points(orig_scaled_parms, col = 2, cex = 2)
+  # Show starting parameters
+  start_scaled_parms <- rep(NA_real_, length(orig_parms))
+  names(start_scaled_parms) <- names(orig_parms)
+  start_scaled_parms[names(start_parms)] <-
+    start_parms / parm_scale[names(start_parms)]
   points(start_scaled_parms, col = 3, cex = 3)
+
+  # Show parameters of original run
+  orig_scaled_parms <- orig_parms / parm_scale
+  points(orig_scaled_parms, col = 2, cex = 2)
+
+  abline(h = 1, lty = 2)
+
   legend(lpos, inset = c(0.05, 0.05), bty = "n",
     pch = 1, col = 3:1, lty = c(NA, NA, 1),
     legend = c(
       "Starting parameters",
-      "Converged parameters",
+      "Original run",
       "Multistart runs"))
 }
diff --git a/log/test.log b/log/test.log
index 942ed50d..6cd9e6a8 100644
--- a/log/test.log
+++ b/log/test.log
@@ -5,18 +5,18 @@
 ✔ |         5 | Calculation of Akaike weights
 ✔ |         3 | Export dataset for reading into CAKE
 ✔ |        12 | Confidence intervals and p-values [1.0s]
-✔ |     1  12 | Dimethenamid data from 2018 [31.6s]
+✔ |     1  12 | Dimethenamid data from 2018 [31.1s]
 ────────────────────────────────────────────────────────────────────────────────
 Skip (test_dmta.R:98:3): Different backends get consistent results for SFO-SFO3+, dimethenamid data
 Reason: Fitting this ODE model with saemix takes about 15 minutes on my system
 ────────────────────────────────────────────────────────────────────────────────
-✔ |        14 | Error model fitting [5.0s]
+✔ |        14 | Error model fitting [4.9s]
 ✔ |         5 | Time step normalisation
 ✔ |         4 | Calculation of FOCUS chi2 error levels [0.6s]
 ✔ |        14 | Results for FOCUS D established in expertise for UBA (Ranke 2014) [0.8s]
-✔ |         4 | Test fitting the decline of metabolites from their maximum [0.3s]
+✔ |         4 | Test fitting the decline of metabolites from their maximum [0.4s]
 ✔ |         1 | Fitting the logistic model [0.2s]
-✔ |         7 | Batch fitting and diagnosing hierarchical kinetic models [14.5s]
+✔ |         7 | Batch fitting and diagnosing hierarchical kinetic models [14.4s]
 ✔ |     1  12 | Nonlinear mixed-effects models [0.3s]
 ────────────────────────────────────────────────────────────────────────────────
 Skip (test_mixed.R:68:3): saemix results are reproducible for biphasic fits
@@ -26,23 +26,23 @@ Reason: Fitting with saemix takes around 10 minutes when using deSolve
 ✔ |        10 | Special cases of mkinfit calls [0.4s]
 ✔ |         3 | mkinfit features [0.7s]
 ✔ |         8 | mkinmod model generation and printing [0.2s]
-✔ |         3 | Model predictions with mkinpredict [0.3s]
-✔ |        16 | Evaluations according to 2015 NAFTA guidance [1.7s]
-✔ |         9 | Nonlinear mixed-effects models with nlme [8.6s]
-✔ |        16 | Plotting [9.7s]
+✔ |         3 | Model predictions with mkinpredict [0.4s]
+✔ |        16 | Evaluations according to 2015 NAFTA guidance [1.8s]
+✔ |         9 | Nonlinear mixed-effects models with nlme [8.5s]
+✔ |        16 | Plotting [9.8s]
 ✔ |         4 | Residuals extracted from mkinfit models
-✔ |        35 | saemix parent models [191.8s]
+✔ |        35 | saemix parent models [189.7s]
 ✔ |         2 | Complex test case from Schaefer et al. (2007) Piacenza paper [1.4s]
 ✔ |        11 | Processing of residue series
-✔ |         7 | Fitting the SFORB model [3.8s]
+✔ |         7 | Fitting the SFORB model [3.6s]
 ✔ |         1 | Summaries of old mkinfit objects
 ✔ |         5 | Summary [0.2s]
-✔ |         4 | Results for synthetic data established in expertise for UBA (Ranke 2014) [2.2s]
+✔ |         4 | Results for synthetic data established in expertise for UBA (Ranke 2014) [2.1s]
 ✔ |         9 | Hypothesis tests [7.8s]
-✔ |         4 | Calculation of maximum time weighted average concentrations (TWAs) [2.2s]
+✔ |         4 | Calculation of maximum time weighted average concentrations (TWAs) [2.1s]
 
 ══ Results ═════════════════════════════════════════════════════════════════════
-Duration: 288.9 s
+Duration: 286.0 s
 
 ── Skipped tests  ──────────────────────────────────────────────────────────────
 • Fitting this ODE model with saemix takes about 15 minutes on my system (1)
diff --git a/man/multistart.Rd b/man/multistart.Rd
index 78ff4614..d20c0465 100644
--- a/man/multistart.Rd
+++ b/man/multistart.Rd
@@ -4,6 +4,10 @@
 \alias{multistart}
 \alias{multistart.saem.mmkin}
 \alias{print.multistart}
+\alias{best}
+\alias{best.default}
+\alias{which.best}
+\alias{which.best.default}
 \title{Perform a hierarchical model fit with multiple starting values}
 \usage{
 multistart(
@@ -17,6 +21,14 @@ multistart(
 \method{multistart}{saem.mmkin}(object, n = 50, cores = 1, cluster = NULL, ...)
 
 \method{print}{multistart}(x, ...)
+
+best(object, ...)
+
+\method{best}{default}(object, ...)
+
+which.best(object, ...)
+
+\method{which.best}{default}(object, ...)
 }
 \arguments{
 \item{object}{The fit object to work with}
@@ -36,6 +48,10 @@ for parallel execution.}
 \value{
 A list of \link{saem.mmkin} objects, with class attributes
 'multistart.saem.mmkin' and 'multistart'.
+
+The object with the highest likelihood
+
+The index of the object with the highest likelihood
 }
 \description{
 The purpose of this method is to check if a certain algorithm for fitting
@@ -69,8 +85,10 @@ f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE)
 f_saem_full <- saem(f_mmkin)
 f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16)
 parhist(f_saem_full_multi, lpos = "bottomright")
+illparms(f_saem_full)
 
-f_saem_reduced <- update(f_saem_full, covariance.model = diag(c(1, 1, 0, 1)))
+f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2")
+illparms(f_saem_reduced)
 # On Windows, we need to create a cluster first. When working with
 # such a cluster, we need to export the mmkin object to the cluster
 # nodes, as it is referred to when updating the saem object on the nodes.
diff --git a/man/parhist.Rd b/man/parhist.Rd
index a8319283..67bbadad 100644
--- a/man/parhist.Rd
+++ b/man/parhist.Rd
@@ -4,11 +4,20 @@
 \alias{parhist}
 \title{Plot parameter distributions from multistart objects}
 \usage{
-parhist(object, lpos = "bottomleft", main = "", ...)
+parhist(
+  object,
+  scale = c("best", "median"),
+  lpos = "bottomleft",
+  main = "",
+  ...
+)
 }
 \arguments{
 \item{object}{The \link{multistart} object}
 
+\item{scale}{By default, scale parameters using the best available fit.
+If 'median', parameters are scaled using the median parameters from all fits.}
+
 \item{lpos}{Positioning of the legend.}
 
 \item{main}{Title of the plot}
@@ -16,8 +25,9 @@ parhist(object, lpos = "bottomleft", main = "", ...)
 \item{\dots}{Passed to \link{boxplot}}
 }
 \description{
-Produces a boxplot with all parameters from the multiple runs, divided by
-using their medians as in the paper by Duchesne et al. (2021).
+Produces a boxplot with all parameters from the multiple runs, scaled
+either by the parameters of the run with the highest likelihood,
+or by their medians as proposed in the paper by Duchesne et al. (2021).
 }
 \references{
 Duchesne R, Guillemin A, Gandrillon O, Crauste F. Practical
-- 
cgit v1.2.3