From fd205e13061de8abc595d266f3b0c7650773d442 Mon Sep 17 00:00:00 2001
From: Johannes Ranke <jranke@uni-bremen.de>
Date: Fri, 16 Sep 2022 21:06:54 +0200
Subject: Improve multistart documentation, bugfix

- Split out llhist and parhist documentation
- Add example code for multistart
- Create a multistart vignette, because the example code fails when run
by pkgdown
- Fix multistart for the case of mkin transformations in the saem fit
---
 R/llhist.R     | 30 +++++++++++++++++++
 R/multistart.R | 91 +++++++++++++++++++++++-----------------------------------
 R/parhist.R    | 52 +++++++++++++++++++++++++++++++++
 3 files changed, 118 insertions(+), 55 deletions(-)
 create mode 100644 R/llhist.R
 create mode 100644 R/parhist.R

(limited to 'R')

diff --git a/R/llhist.R b/R/llhist.R
new file mode 100644
index 00000000..6a6d93cd
--- /dev/null
+++ b/R/llhist.R
@@ -0,0 +1,30 @@
+#' Plot the distribution of log likelihoods from multistart objects
+#'
+#' Produces a histogram of log-likelihoods, and an overlayed kernel density 
+#' estimate. In addition, the likelihood of the original fit is shown as
+#' a red vertical line.
+#'
+#' @param object The [multistart] object
+#' @param breaks Passed to [hist]
+#' @param lpos Positioning of the legend.
+#' @param main Title of the plot
+#' @param \dots Passed to [hist]
+#' @seealso [multistart]
+#' @importFrom KernSmooth bkde
+#' @export
+llhist <- function(object, breaks = "Sturges", lpos = "topleft", main = "", ...) {
+  ll <- sapply(object, logLik)
+  kde <- KernSmooth::bkde(ll)
+  h <- hist(ll, freq = TRUE,
+    xlim = range(kde$x),
+    xlab = "", main = main,
+    ylab = "Frequency of log likelihoods", breaks = breaks, ...)
+
+  freq_factor <- h$counts[1] / h$density[1]
+  lines(kde$x, freq_factor * kde$y)
+  abline(v = logLik(attr(object, "orig")), col = 2)
+  legend(lpos, inset = c(0.05, 0.05), bty = "n",
+    lty = 1, col = c(2, 1),
+    legend = c("original log likelihood",
+      "kernel density estimate"))
+}
diff --git a/R/multistart.R b/R/multistart.R
index 52f279f0..6fd98522 100644
--- a/R/multistart.R
+++ b/R/multistart.R
@@ -10,24 +10,49 @@
 #' Currently, parallel execution of the fits is only supported using
 #' [parallel::mclapply], i.e. not available on Windows.
 #'
+#' In case the online version of this help page contains error messages
+#' in the example code and no plots, this is due to the multistart method
+#' not working when called by pkgdown. Please refer to the 
+#' [online vignette](https://pkgdown.jrwb.de/mkin/dev/articles/web_only/multistart.html)
+#' in this case.
+#'
 #' @param object The fit object to work with
 #' @param n How many different combinations of starting parameters should be
 #' used?
 #' @param cores How many fits should be run in parallel?
-#' @param \dots Passed to the update function, or to the basic plotting
-#' function in the case of the graphical functions.
+#' @param \dots Passed to the update function.
 #' @param x The multistart object to print
-#' @param breaks Passed to [hist]
-#' @param main title of the plot
-#' @param lpos Positioning of the legend.
 #' @return A list of [saem.mmkin] objects, with class attributes
 #' 'multistart.saem.mmkin' and 'multistart'.
+#' @seealso [parhist], [llhist]
 #'
 #' @references Duchesne R, Guillemin A, Gandrillon O, Crauste F. Practical
 #' identifiability in the frame of nonlinear mixed effects models: the example
 #' of the in vitro erythropoiesis. BMC Bioinformatics. 2021 Oct 4;22(1):478.
 #' doi: 10.1186/s12859-021-04373-4.
 #' @export
+#' @examples
+#' \dontrun{
+#' library(mkin)
+#' dmta_ds <- lapply(1:7, function(i) {
+#'   ds_i <- dimethenamid_2018$ds[[i]]$data
+#'   ds_i[ds_i$name == "DMTAP", "name"] <-  "DMTA"
+#'   ds_i$time <- ds_i$time * dimethenamid_2018$f_time_norm[i]
+#'   ds_i
+#' })
+#' names(dmta_ds) <- sapply(dimethenamid_2018$ds, function(ds) ds$title)
+#' dmta_ds[["Elliot"]] <- rbind(dmta_ds[["Elliot 1"]], dmta_ds[["Elliot 2"]])
+#' dmta_ds[["Elliot 1"]] <- dmta_ds[["Elliot 2"]] <- NULL
+#'
+#' f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE)
+#' f_saem_full <- saem(f_mmkin)
+#' f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16)
+#' parhist(f_saem_full_multi, lpos = "bottomright")
+#'
+#' f_saem_reduced <- update(f_saem_full, covariance.model = diag(c(1, 1, 0, 1)))
+#' f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16)
+#' parhist(f_saem_reduced_multi, lpos = "bottomright")
+#' }
 multistart <- function(object, n = 50, cores = 1, ...)
 {
   UseMethod("multistart", object)
@@ -36,10 +61,13 @@ multistart <- function(object, n = 50, cores = 1, ...)
 #' @rdname multistart
 #' @export
 multistart.saem.mmkin <- function(object, n = 50, cores = 1, ...) {
+
+  mmkin_parms <- parms(object$mmkin, errparms = FALSE,
+    transformed = object$transformations == "mkin")
   start_parms <- apply(
-    parms(object$mmkin, errparms = FALSE), 1,
-      function(x) stats::runif(n, min(x), max(x))
-  )
+    mmkin_parms, 1,
+    function(x) stats::runif(n, min(x), max(x)))
+  if (n == 1) dim(start_parms) <- c(1, length(start_parms))
 
   res <- parallel::mclapply(1:n, function(x) {
     update(object, degparms_start = start_parms[x, ], ...)
@@ -62,50 +90,3 @@ print.multistart <- function(x, ...) {
 parms.multistart <- function(object, ...) {
   t(sapply(object, parms))
 }
-
-#' @rdname multistart
-#' @importFrom stats median
-#' @export
-parhist <- function(object, lpos = "topleft", main = "", ...) {
-  orig <- attr(object, "orig")
-  orig_parms <- parms(orig)
-  start_parms <- orig$mean_dp_start
-  all_parms <- parms(object)
-  median_parms <- apply(all_parms, 2, median)
-  all_scaled_parms <- t(apply(all_parms, 1, function(x) x / median_parms))
-  orig_scaled_parms <- orig_parms / median_parms
-  start_scaled_parms <- rep(NA_real_, length(orig_parms))
-  names(start_scaled_parms) <- names(orig_parms)
-  start_scaled_parms[names(start_parms)] <-
-    start_parms / median_parms[names(start_parms)]
-
-  boxplot(all_scaled_parms, log = "y", main = main, ...)
-  points(orig_scaled_parms, col = 2, cex = 2)
-  points(start_scaled_parms, col = 3, cex = 3)
-  legend(lpos, inset = c(0.05, 0.05), bty = "n",
-    pch = 1, col = 3:1, lty = c(NA, NA, 1),
-    legend = c(
-      "Starting parameters",
-      "Converged parameters",
-      "Multistart runs"))
-}
-
-#' @rdname multistart
-#' @importFrom KernSmooth bkde
-#' @export
-llhist <- function(object, breaks = "Sturges", lpos = "topleft", main = "", ...) {
-  ll <- sapply(object, logLik)
-  kde <- KernSmooth::bkde(ll)
-  h <- hist(ll, freq = TRUE,
-    xlim = range(kde$x),
-    xlab = "", main = main,
-    ylab = "Frequency of log likelihoods", breaks = breaks, ...)
-
-  freq_factor <- h$counts[1] / h$density[1]
-  lines(kde$x, freq_factor * kde$y)
-  abline(v = logLik(attr(object, "orig")), col = 2)
-  legend(lpos, inset = c(0.05, 0.05), bty = "n",
-    lty = 1, col = c(2, 1),
-    legend = c("original log likelihood",
-      "kernel density estimate"))
-}
diff --git a/R/parhist.R b/R/parhist.R
new file mode 100644
index 00000000..62f67249
--- /dev/null
+++ b/R/parhist.R
@@ -0,0 +1,52 @@
+#' Plot parameter distributions from multistart objects
+#'
+#' Produces a boxplot with all parameters from the multiple runs, scaled using
+#' their medians. If parameter transformations were done by mkin (default in
+#' [saem]), then the parameters found by saem are on the transformed scale, and
+#' scaling is simply done by subtracting the median. If parameter
+#' transformations were done by saemix, scaling is done by dividing by the
+#' median, as in the paper by Duchesne et al. (2021).
+#'
+#' @param object The [multistart] object
+#' @param \dots Passed to [boxplot]
+#' @param main Title of the plot
+#' @param lpos Positioning of the legend.
+#' @references Duchesne R, Guillemin A, Gandrillon O, Crauste F. Practical
+#' identifiability in the frame of nonlinear mixed effects models: the example
+#' of the in vitro erythropoiesis. BMC Bioinformatics. 2021 Oct 4;22(1):478.
+#' doi: 10.1186/s12859-021-04373-4.
+#' @seealso [multistart]
+#' @importFrom stats median
+#' @export
+parhist <- function(object, lpos = "topleft", main = "", ...) {
+  orig <- attr(object, "orig")
+  orig_parms <- parms(orig)
+  start_parms <- orig$mean_dp_start
+  all_parms <- parms(object)
+  median_parms <- apply(all_parms, 2, median)
+  start_scaled_parms <- rep(NA_real_, length(orig_parms))
+  names(start_scaled_parms) <- names(orig_parms)
+
+  if (orig$transformations == "saemix") {
+    orig_scaled_parms <- orig_parms / median_parms
+    all_scaled_parms <- t(apply(all_parms, 1, function(x) x / median_parms))
+    start_scaled_parms[names(start_parms)] <-
+      start_parms / median_parms[names(start_parms)]
+    boxplot(all_scaled_parms, log = "y", main = main, ...)
+  } else {
+    orig_scaled_parms <- orig_parms - median_parms
+    all_scaled_parms <- t(apply(all_parms, 1, function(x) x - median_parms))
+    start_scaled_parms[names(start_parms)] <-
+      start_parms - median_parms[names(start_parms)]
+    boxplot(all_scaled_parms, main = main, ...)
+  }
+
+  points(orig_scaled_parms, col = 2, cex = 2)
+  points(start_scaled_parms, col = 3, cex = 3)
+  legend(lpos, inset = c(0.05, 0.05), bty = "n",
+    pch = 1, col = 3:1, lty = c(NA, NA, 1),
+    legend = c(
+      "Starting parameters",
+      "Converged parameters",
+      "Multistart runs"))
+}
-- 
cgit v1.2.3