From 7a3f2ee22419608a8a634fd4d71d7176303b2f41 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Mon, 19 Sep 2022 12:55:00 +0200 Subject: Improve parhist and llhist In particular, adapt the display of parameter boxplots for saem fits using mkin transformations to the way used for saem fits using saemix transformations, i.e. always show parameters on the natural scale, and normalised them by dividing by the median from the multiple runs. --- R/llhist.R | 7 ++- R/multistart.R | 4 +- R/parhist.R | 58 ++++++++++++++------- docs/dev/articles/web_only/multistart.html | 6 +-- .../figure-html/unnamed-chunk-2-1.png | Bin 52067 -> 61772 bytes .../figure-html/unnamed-chunk-3-1.png | Bin 50142 -> 56540 bytes .../figure-html/unnamed-chunk-4-1.png | Bin 39793 -> 39866 bytes docs/dev/pkgdown.yml | 2 +- docs/dev/reference/parhist.html | 23 ++++---- man/parhist.Rd | 10 ++-- vignettes/web_only/multistart.R | 7 ++- vignettes/web_only/multistart.html | 13 ++--- vignettes/web_only/multistart.rmd | 6 +-- 13 files changed, 77 insertions(+), 59 deletions(-) diff --git a/R/llhist.R b/R/llhist.R index 6a6d93cd..b7f6de21 100644 --- a/R/llhist.R +++ b/R/llhist.R @@ -1,6 +1,6 @@ #' Plot the distribution of log likelihoods from multistart objects #' -#' Produces a histogram of log-likelihoods, and an overlayed kernel density +#' Produces a histogram of log-likelihoods, and an overlayed kernel density #' estimate. In addition, the likelihood of the original fit is shown as #' a red vertical line. #' @@ -13,8 +13,13 @@ #' @importFrom KernSmooth bkde #' @export llhist <- function(object, breaks = "Sturges", lpos = "topleft", main = "", ...) { + oldpar <- par(no.readonly = TRUE) + on.exit(par(oldpar, no.readonly = TRUE)) + ll <- sapply(object, logLik) kde <- KernSmooth::bkde(ll) + + par(las = 1) h <- hist(ll, freq = TRUE, xlim = range(kde$x), xlab = "", main = main, diff --git a/R/multistart.R b/R/multistart.R index 6fd98522..beb8194b 100644 --- a/R/multistart.R +++ b/R/multistart.R @@ -12,7 +12,7 @@ #' #' In case the online version of this help page contains error messages #' in the example code and no plots, this is due to the multistart method -#' not working when called by pkgdown. Please refer to the +#' not working when called by pkgdown. Please refer to the #' [online vignette](https://pkgdown.jrwb.de/mkin/dev/articles/web_only/multistart.html) #' in this case. #' @@ -61,13 +61,13 @@ multistart <- function(object, n = 50, cores = 1, ...) #' @rdname multistart #' @export multistart.saem.mmkin <- function(object, n = 50, cores = 1, ...) { + if (n <= 1) stop("Please specify an n of at least 2") mmkin_parms <- parms(object$mmkin, errparms = FALSE, transformed = object$transformations == "mkin") start_parms <- apply( mmkin_parms, 1, function(x) stats::runif(n, min(x), max(x))) - if (n == 1) dim(start_parms) <- c(1, length(start_parms)) res <- parallel::mclapply(1:n, function(x) { update(object, degparms_start = start_parms[x, ], ...) diff --git a/R/parhist.R b/R/parhist.R index 62f67249..69aafe02 100644 --- a/R/parhist.R +++ b/R/parhist.R @@ -1,11 +1,7 @@ #' Plot parameter distributions from multistart objects #' -#' Produces a boxplot with all parameters from the multiple runs, scaled using -#' their medians. If parameter transformations were done by mkin (default in -#' [saem]), then the parameters found by saem are on the transformed scale, and -#' scaling is simply done by subtracting the median. If parameter -#' transformations were done by saemix, scaling is done by dividing by the -#' median, as in the paper by Duchesne et al. (2021). +#' Produces a boxplot with all parameters from the multiple runs, divided by +#' using their medians as in the paper by Duchesne et al. (2021). #' #' @param object The [multistart] object #' @param \dots Passed to [boxplot] @@ -18,28 +14,50 @@ #' @seealso [multistart] #' @importFrom stats median #' @export -parhist <- function(object, lpos = "topleft", main = "", ...) { +parhist <- function(object, lpos = "bottomleft", main = "", ...) { + oldpar <- par(no.readonly = TRUE) + on.exit(par(oldpar, no.readonly = TRUE)) + orig <- attr(object, "orig") orig_parms <- parms(orig) start_parms <- orig$mean_dp_start all_parms <- parms(object) + + par(las = 1) + if (orig$transformations == "mkin") { + degparm_names_transformed <- names(start_parms) + degparm_index <- which(names(orig_parms) %in% degparm_names_transformed) + orig_parms[degparm_names_transformed] <- backtransform_odeparms( + orig_parms[degparm_names_transformed], + orig$mmkin$mkinmod, + transform_rates = orig$mmkin[[1]]$transform_rates, + transform_fractions = orig$mmkin[[1]]$transform_fractions) + start_parms <- backtransform_odeparms(start_parms, + orig$mmkin$mkinmod, + transform_rates = orig$mmkin[[1]]$transform_rates, + transform_fractions = orig$mmkin[[1]]$transform_fractions) + degparm_names <- names(start_parms) + + names(orig_parms) <- c(degparm_names, names(orig_parms[-degparm_index])) + + all_parms[, degparm_names_transformed] <- + t(apply(all_parms[, degparm_names_transformed], 1, backtransform_odeparms, + orig$mmkin$mkinmod, + transform_rates = orig$mmkin[[1]]$transform_rates, + transform_fractions = orig$mmkin[[1]]$transform_fractions)) + colnames(all_parms)[1:length(degparm_names)] <- degparm_names + } + median_parms <- apply(all_parms, 2, median) start_scaled_parms <- rep(NA_real_, length(orig_parms)) names(start_scaled_parms) <- names(orig_parms) - if (orig$transformations == "saemix") { - orig_scaled_parms <- orig_parms / median_parms - all_scaled_parms <- t(apply(all_parms, 1, function(x) x / median_parms)) - start_scaled_parms[names(start_parms)] <- - start_parms / median_parms[names(start_parms)] - boxplot(all_scaled_parms, log = "y", main = main, ...) - } else { - orig_scaled_parms <- orig_parms - median_parms - all_scaled_parms <- t(apply(all_parms, 1, function(x) x - median_parms)) - start_scaled_parms[names(start_parms)] <- - start_parms - median_parms[names(start_parms)] - boxplot(all_scaled_parms, main = main, ...) - } + orig_scaled_parms <- orig_parms / median_parms + all_scaled_parms <- t(apply(all_parms, 1, function(x) x / median_parms)) + start_scaled_parms[names(start_parms)] <- + start_parms / median_parms[names(start_parms)] + boxplot(all_scaled_parms, log = "y", main = main, , + ylab = "Normalised parameters", ...) points(orig_scaled_parms, col = 2, cex = 2) points(start_scaled_parms, col = 3, cex = 3) diff --git a/docs/dev/articles/web_only/multistart.html b/docs/dev/articles/web_only/multistart.html index b3108120..f9b27a0c 100644 --- a/docs/dev/articles/web_only/multistart.html +++ b/docs/dev/articles/web_only/multistart.html @@ -109,7 +109,7 @@

Short demo of the multistart method

Johannes Ranke

-

Last change 16 September 2022 (rebuilt 2022-09-16)

+

Last change 19 September 2022 (rebuilt 2022-09-19)

Source: vignettes/web_only/multistart.rmd @@ -135,13 +135,13 @@ f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE) f_saem_full <- saem(f_mmkin) f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16) -parhist(f_saem_full_multi, lpos = "bottomright") +parhist(f_saem_full_multi)

We see that not all variability parameters are identifiable, most problematic is the variance of k2. So we reduce the parameter distribution model by removing the intersoil variability for this parameter.

 f_saem_reduced <- update(f_saem_full, covariance.model = diag(c(1, 1, 0, 1)))
 f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16)
-parhist(f_saem_reduced_multi, lpos = "bottomright")
+parhist(f_saem_reduced_multi, lpos = "topright")

We can also analyse the log-likelihoods obtained in the multiple runs:

diff --git a/docs/dev/articles/web_only/multistart_files/figure-html/unnamed-chunk-2-1.png b/docs/dev/articles/web_only/multistart_files/figure-html/unnamed-chunk-2-1.png
index e30e56f0..18800fb9 100644
Binary files a/docs/dev/articles/web_only/multistart_files/figure-html/unnamed-chunk-2-1.png and b/docs/dev/articles/web_only/multistart_files/figure-html/unnamed-chunk-2-1.png differ
diff --git a/docs/dev/articles/web_only/multistart_files/figure-html/unnamed-chunk-3-1.png b/docs/dev/articles/web_only/multistart_files/figure-html/unnamed-chunk-3-1.png
index 3ed0d546..908989e9 100644
Binary files a/docs/dev/articles/web_only/multistart_files/figure-html/unnamed-chunk-3-1.png and b/docs/dev/articles/web_only/multistart_files/figure-html/unnamed-chunk-3-1.png differ
diff --git a/docs/dev/articles/web_only/multistart_files/figure-html/unnamed-chunk-4-1.png b/docs/dev/articles/web_only/multistart_files/figure-html/unnamed-chunk-4-1.png
index 957c2457..e50177cc 100644
Binary files a/docs/dev/articles/web_only/multistart_files/figure-html/unnamed-chunk-4-1.png and b/docs/dev/articles/web_only/multistart_files/figure-html/unnamed-chunk-4-1.png differ
diff --git a/docs/dev/pkgdown.yml b/docs/dev/pkgdown.yml
index c9b168a6..144a7f80 100644
--- a/docs/dev/pkgdown.yml
+++ b/docs/dev/pkgdown.yml
@@ -12,7 +12,7 @@ articles:
   compiled_models: web_only/compiled_models.html
   dimethenamid_2018: web_only/dimethenamid_2018.html
   multistart: web_only/multistart.html
-last_built: 2022-09-16T19:23Z
+last_built: 2022-09-19T10:47Z
 urls:
   reference: https://pkgdown.jrwb.de/mkin/reference
   article: https://pkgdown.jrwb.de/mkin/articles
diff --git a/docs/dev/reference/parhist.html b/docs/dev/reference/parhist.html
index a2fd717b..bc230c21 100644
--- a/docs/dev/reference/parhist.html
+++ b/docs/dev/reference/parhist.html
@@ -1,10 +1,6 @@
 
-Plot parameter distributions from multistart objects — parhist • mkinPlot parameter distributions from multistart objects — parhist • mkin
@@ -49,11 +45,14 @@ median, as in the paper by Duchesne et al. (2021).">Example evaluations of dimethenamid data from 2018 with nonlinear mixed-effects models
     
     
  • - Example evaluation of FOCUS Example Dataset Z + Short demo of the multistart method
  • Performance benefit by using compiled model definitions in mkin
  • +
  • + Example evaluation of FOCUS Example Dataset Z +
  • Calculation of time weighted average concentrations with mkin
  • @@ -88,16 +87,12 @@ median, as in the paper by Duchesne et al. (2021).">
    -

    Produces a boxplot with all parameters from the multiple runs, scaled using -their medians. If parameter transformations were done by mkin (default in -saem), then the parameters found by saem are on the transformed scale, and -scaling is simply done by subtracting the median. If parameter -transformations were done by saemix, scaling is done by dividing by the -median, as in the paper by Duchesne et al. (2021).

    +

    Produces a boxplot with all parameters from the multiple runs, divided by +using their medians as in the paper by Duchesne et al. (2021).

    -
    parhist(object, lpos = "topleft", main = "", ...)
    +
    parhist(object, lpos = "bottomleft", main = "", ...)
    diff --git a/man/parhist.Rd b/man/parhist.Rd index 1f517298..a8319283 100644 --- a/man/parhist.Rd +++ b/man/parhist.Rd @@ -4,7 +4,7 @@ \alias{parhist} \title{Plot parameter distributions from multistart objects} \usage{ -parhist(object, lpos = "topleft", main = "", ...) +parhist(object, lpos = "bottomleft", main = "", ...) } \arguments{ \item{object}{The \link{multistart} object} @@ -16,12 +16,8 @@ parhist(object, lpos = "topleft", main = "", ...) \item{\dots}{Passed to \link{boxplot}} } \description{ -Produces a boxplot with all parameters from the multiple runs, scaled using -their medians. If parameter transformations were done by mkin (default in -\link{saem}), then the parameters found by saem are on the transformed scale, and -scaling is simply done by subtracting the median. If parameter -transformations were done by saemix, scaling is done by dividing by the -median, as in the paper by Duchesne et al. (2021). +Produces a boxplot with all parameters from the multiple runs, divided by +using their medians as in the paper by Duchesne et al. (2021). } \references{ Duchesne R, Guillemin A, Gandrillon O, Crauste F. Practical diff --git a/vignettes/web_only/multistart.R b/vignettes/web_only/multistart.R index a860757d..b995c545 100644 --- a/vignettes/web_only/multistart.R +++ b/vignettes/web_only/multistart.R @@ -14,10 +14,13 @@ dmta_ds[["Elliot 1"]] <- dmta_ds[["Elliot 2"]] <- NULL f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE) f_saem_full <- saem(f_mmkin) f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16) -parhist(f_saem_full_multi, lpos = "bottomright") +parhist(f_saem_full_multi) ## ----------------------------------------------------------------------------- f_saem_reduced <- update(f_saem_full, covariance.model = diag(c(1, 1, 0, 1))) f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16) -parhist(f_saem_reduced_multi, lpos = "bottomright") +parhist(f_saem_reduced_multi, lpos = "topright") + +## ----------------------------------------------------------------------------- +llhist(f_saem_reduced_multi) diff --git a/vignettes/web_only/multistart.html b/vignettes/web_only/multistart.html index 004b62be..5a91584c 100644 --- a/vignettes/web_only/multistart.html +++ b/vignettes/web_only/multistart.html @@ -364,7 +364,7 @@ pre code {

    Short demo of the multistart method

    Johannes Ranke

    -

    Last change 16 September 2022 (rebuilt 2022-09-16)

    +

    Last change 19 September 2022 (rebuilt 2022-09-19)

    @@ -384,16 +384,17 @@ dmta_ds[["Elliot 1"]] <- dmta_ds[["Elliot 2"]] <- NULL
    f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE)
     f_saem_full <- saem(f_mmkin)
     f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16)
    -parhist(f_saem_full_multi, lpos = "bottomright")
    -

    +parhist(f_saem_full_multi)
    +

    We see that not all variability parameters are identifiable, most problematic is the variance of k2. So we reduce the parameter distribution model by removing the intersoil variability for this parameter.

    f_saem_reduced <- update(f_saem_full, covariance.model = diag(c(1, 1, 0, 1)))
     f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16)
    -parhist(f_saem_reduced_multi, lpos = "bottomright")
    -

    -

    We can also analyse the log-likelihoods obtained in the multiple runs

    +parhist(f_saem_reduced_multi, lpos = "topright") +

    +

    We can also analyse the log-likelihoods obtained in the multiple runs:

    llhist(f_saem_reduced_multi)

    +

    The one run with the lower likelihood is probably responsible for the outliers in the boxplots above, and caused by some weird starting value combination. In any case, the converged values from the original fit (red circles) appear perfectly acceptable, supporting the choice of starting values made by mkin.

    diff --git a/vignettes/web_only/multistart.rmd b/vignettes/web_only/multistart.rmd index a25b73ce..8f349973 100644 --- a/vignettes/web_only/multistart.rmd +++ b/vignettes/web_only/multistart.rmd @@ -1,7 +1,7 @@ --- title: Short demo of the multistart method author: Johannes Ranke -date: Last change 16 September 2022 (rebuilt `r Sys.Date()`) +date: Last change 19 September 2022 (rebuilt `r Sys.Date()`) output: html_document vignette: > @@ -34,7 +34,7 @@ random effects for all degradation parameters. f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE) f_saem_full <- saem(f_mmkin) f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16) -parhist(f_saem_full_multi, lpos = "bottomright") +parhist(f_saem_full_multi) ``` We see that not all variability parameters are identifiable, most problematic @@ -44,7 +44,7 @@ removing the intersoil variability for this parameter. ```{r} f_saem_reduced <- update(f_saem_full, covariance.model = diag(c(1, 1, 0, 1))) f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16) -parhist(f_saem_reduced_multi, lpos = "bottomright") +parhist(f_saem_reduced_multi, lpos = "topright") ``` We can also analyse the log-likelihoods obtained in the multiple runs: -- cgit v1.2.1