From b76e401a854021eaeda6f8ba262baf37b4ecfac2 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Thu, 13 Oct 2022 03:51:22 +0200 Subject: Select best fit from multistart, use in parhist - Add 'best' and 'which.best' generics with methods for multistart objects - Per default, scale the parameters in parhist plots using the fit with the highest log likelihood. --- NAMESPACE | 4 ++++ R/multistart.R | 40 +++++++++++++++++++++++++++++++++++++++- R/parhist.R | 42 +++++++++++++++++++++++++++++------------- log/test.log | 26 +++++++++++++------------- man/multistart.Rd | 20 +++++++++++++++++++- man/parhist.Rd | 16 +++++++++++++--- 6 files changed, 117 insertions(+), 31 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index f6c5ebf9..3b4d2367 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ S3method(aw,mixed.mmkin) S3method(aw,mkinfit) S3method(aw,mmkin) S3method(aw,multistart) +S3method(best,default) S3method(confint,mkinfit) S3method(convergence,mhmkin) S3method(convergence,mmkin) @@ -71,6 +72,7 @@ S3method(update,mkinfit) S3method(update,mmkin) S3method(update,nlme.mmkin) S3method(update,saem.mmkin) +S3method(which.best,default) export(CAKE_export) export(DFOP.solution) export(FOMC.solution) @@ -81,6 +83,7 @@ export(SFORB.solution) export(add_err) export(aw) export(backtransform_odeparms) +export(best) export(convergence) export(create_deg_func) export(endpoints) @@ -133,6 +136,7 @@ export(set_nd_nq) export(set_nd_nq_focus) export(sigma_twocomp) export(transform_odeparms) +export(which.best) import(deSolve) import(graphics) import(nlme) diff --git a/R/multistart.R b/R/multistart.R index b65c0bee..a788953e 100644 --- a/R/multistart.R +++ b/R/multistart.R @@ -47,8 +47,10 @@ #' f_saem_full <- saem(f_mmkin) #' f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16) #' parhist(f_saem_full_multi, lpos = "bottomright") +#' illparms(f_saem_full) #' -#' f_saem_reduced <- update(f_saem_full, covariance.model = diag(c(1, 1, 0, 1))) +#' f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2") +#' illparms(f_saem_reduced) #' # On Windows, we need to create a cluster first. When working with #' # such a cluster, we need to export the mmkin object to the cluster #' # nodes, as it is referred to when updating the saem object on the nodes. @@ -140,3 +142,39 @@ print.multistart <- function(x, ...) { cat(" object with", length(x), "fits:\n") print(convergence(x)) } + +#' @rdname multistart +#' @export +best <- function(object, ...) +{ + UseMethod("best", object) +} + +#' @export +#' @return The object with the highest likelihood +#' @rdname multistart +best.default <- function(object, ...) +{ + return(object[[which.best(object)]]) +} + +#' @return The index of the object with the highest likelihood +#' @rdname multistart +#' @export +which.best <- function(object, ...) +{ + UseMethod("which.best", object) +} + +#' @rdname multistart +#' @export +which.best.default <- function(object, ...) +{ + llfunc <- function(object) { + ret <- try(logLik(object)) + if (inherits(ret, "try-error")) return(NA) + else return(ret) + } + ll <- sapply(object, llfunc) + return(which.max(ll)) +} diff --git a/R/parhist.R b/R/parhist.R index 10730873..5d498664 100644 --- a/R/parhist.R +++ b/R/parhist.R @@ -1,12 +1,15 @@ #' Plot parameter distributions from multistart objects #' -#' Produces a boxplot with all parameters from the multiple runs, divided by -#' using their medians as in the paper by Duchesne et al. (2021). +#' Produces a boxplot with all parameters from the multiple runs, scaled +#' either by the parameters of the run with the highest likelihood, +#' or by their medians as proposed in the paper by Duchesne et al. (2021). #' #' @param object The [multistart] object -#' @param \dots Passed to [boxplot] +#' @param scale By default, scale parameters using the best available fit. +#' If 'median', parameters are scaled using the median parameters from all fits. #' @param main Title of the plot #' @param lpos Positioning of the legend. +#' @param \dots Passed to [boxplot] #' @references Duchesne R, Guillemin A, Gandrillon O, Crauste F. Practical #' identifiability in the frame of nonlinear mixed effects models: the example #' of the in vitro erythropoiesis. BMC Bioinformatics. 2021 Oct 4;22(1):478. @@ -14,7 +17,9 @@ #' @seealso [multistart] #' @importFrom stats median #' @export -parhist <- function(object, lpos = "bottomleft", main = "", ...) { +parhist <- function(object, scale = c("best", "median"), + lpos = "bottomleft", main = "", ...) +{ oldpar <- par(no.readonly = TRUE) on.exit(par(oldpar, no.readonly = TRUE)) @@ -48,23 +53,34 @@ parhist <- function(object, lpos = "bottomleft", main = "", ...) { colnames(all_parms)[1:length(degparm_names)] <- degparm_names } - median_parms <- apply(all_parms, 2, median) - start_scaled_parms <- rep(NA_real_, length(orig_parms)) - names(start_scaled_parms) <- names(orig_parms) + scale <- match.arg(scale) + parm_scale <- switch(scale, + best = all_parms[which.best(object), ], + median = apply(all_parms, 2, median) + ) - orig_scaled_parms <- orig_parms / median_parms - all_scaled_parms <- t(apply(all_parms, 1, function(x) x / median_parms)) - start_scaled_parms[names(start_parms)] <- - start_parms / median_parms[names(start_parms)] + # Boxplots of all scaled parameters + all_scaled_parms <- t(apply(all_parms, 1, function(x) x / parm_scale)) boxplot(all_scaled_parms, log = "y", main = main, , ylab = "Normalised parameters", ...) - points(orig_scaled_parms, col = 2, cex = 2) + # Show starting parameters + start_scaled_parms <- rep(NA_real_, length(orig_parms)) + names(start_scaled_parms) <- names(orig_parms) + start_scaled_parms[names(start_parms)] <- + start_parms / parm_scale[names(start_parms)] points(start_scaled_parms, col = 3, cex = 3) + + # Show parameters of original run + orig_scaled_parms <- orig_parms / parm_scale + points(orig_scaled_parms, col = 2, cex = 2) + + abline(h = 1, lty = 2) + legend(lpos, inset = c(0.05, 0.05), bty = "n", pch = 1, col = 3:1, lty = c(NA, NA, 1), legend = c( "Starting parameters", - "Converged parameters", + "Original run", "Multistart runs")) } diff --git a/log/test.log b/log/test.log index 942ed50d..6cd9e6a8 100644 --- a/log/test.log +++ b/log/test.log @@ -5,18 +5,18 @@ ✔ | 5 | Calculation of Akaike weights ✔ | 3 | Export dataset for reading into CAKE ✔ | 12 | Confidence intervals and p-values [1.0s] -✔ | 1 12 | Dimethenamid data from 2018 [31.6s] +✔ | 1 12 | Dimethenamid data from 2018 [31.1s] ──────────────────────────────────────────────────────────────────────────────── Skip (test_dmta.R:98:3): Different backends get consistent results for SFO-SFO3+, dimethenamid data Reason: Fitting this ODE model with saemix takes about 15 minutes on my system ──────────────────────────────────────────────────────────────────────────────── -✔ | 14 | Error model fitting [5.0s] +✔ | 14 | Error model fitting [4.9s] ✔ | 5 | Time step normalisation ✔ | 4 | Calculation of FOCUS chi2 error levels [0.6s] ✔ | 14 | Results for FOCUS D established in expertise for UBA (Ranke 2014) [0.8s] -✔ | 4 | Test fitting the decline of metabolites from their maximum [0.3s] +✔ | 4 | Test fitting the decline of metabolites from their maximum [0.4s] ✔ | 1 | Fitting the logistic model [0.2s] -✔ | 7 | Batch fitting and diagnosing hierarchical kinetic models [14.5s] +✔ | 7 | Batch fitting and diagnosing hierarchical kinetic models [14.4s] ✔ | 1 12 | Nonlinear mixed-effects models [0.3s] ──────────────────────────────────────────────────────────────────────────────── Skip (test_mixed.R:68:3): saemix results are reproducible for biphasic fits @@ -26,23 +26,23 @@ Reason: Fitting with saemix takes around 10 minutes when using deSolve ✔ | 10 | Special cases of mkinfit calls [0.4s] ✔ | 3 | mkinfit features [0.7s] ✔ | 8 | mkinmod model generation and printing [0.2s] -✔ | 3 | Model predictions with mkinpredict [0.3s] -✔ | 16 | Evaluations according to 2015 NAFTA guidance [1.7s] -✔ | 9 | Nonlinear mixed-effects models with nlme [8.6s] -✔ | 16 | Plotting [9.7s] +✔ | 3 | Model predictions with mkinpredict [0.4s] +✔ | 16 | Evaluations according to 2015 NAFTA guidance [1.8s] +✔ | 9 | Nonlinear mixed-effects models with nlme [8.5s] +✔ | 16 | Plotting [9.8s] ✔ | 4 | Residuals extracted from mkinfit models -✔ | 35 | saemix parent models [191.8s] +✔ | 35 | saemix parent models [189.7s] ✔ | 2 | Complex test case from Schaefer et al. (2007) Piacenza paper [1.4s] ✔ | 11 | Processing of residue series -✔ | 7 | Fitting the SFORB model [3.8s] +✔ | 7 | Fitting the SFORB model [3.6s] ✔ | 1 | Summaries of old mkinfit objects ✔ | 5 | Summary [0.2s] -✔ | 4 | Results for synthetic data established in expertise for UBA (Ranke 2014) [2.2s] +✔ | 4 | Results for synthetic data established in expertise for UBA (Ranke 2014) [2.1s] ✔ | 9 | Hypothesis tests [7.8s] -✔ | 4 | Calculation of maximum time weighted average concentrations (TWAs) [2.2s] +✔ | 4 | Calculation of maximum time weighted average concentrations (TWAs) [2.1s] ══ Results ═════════════════════════════════════════════════════════════════════ -Duration: 288.9 s +Duration: 286.0 s ── Skipped tests ────────────────────────────────────────────────────────────── • Fitting this ODE model with saemix takes about 15 minutes on my system (1) diff --git a/man/multistart.Rd b/man/multistart.Rd index 78ff4614..d20c0465 100644 --- a/man/multistart.Rd +++ b/man/multistart.Rd @@ -4,6 +4,10 @@ \alias{multistart} \alias{multistart.saem.mmkin} \alias{print.multistart} +\alias{best} +\alias{best.default} +\alias{which.best} +\alias{which.best.default} \title{Perform a hierarchical model fit with multiple starting values} \usage{ multistart( @@ -17,6 +21,14 @@ multistart( \method{multistart}{saem.mmkin}(object, n = 50, cores = 1, cluster = NULL, ...) \method{print}{multistart}(x, ...) + +best(object, ...) + +\method{best}{default}(object, ...) + +which.best(object, ...) + +\method{which.best}{default}(object, ...) } \arguments{ \item{object}{The fit object to work with} @@ -36,6 +48,10 @@ for parallel execution.} \value{ A list of \link{saem.mmkin} objects, with class attributes 'multistart.saem.mmkin' and 'multistart'. + +The object with the highest likelihood + +The index of the object with the highest likelihood } \description{ The purpose of this method is to check if a certain algorithm for fitting @@ -69,8 +85,10 @@ f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE) f_saem_full <- saem(f_mmkin) f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16) parhist(f_saem_full_multi, lpos = "bottomright") +illparms(f_saem_full) -f_saem_reduced <- update(f_saem_full, covariance.model = diag(c(1, 1, 0, 1))) +f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2") +illparms(f_saem_reduced) # On Windows, we need to create a cluster first. When working with # such a cluster, we need to export the mmkin object to the cluster # nodes, as it is referred to when updating the saem object on the nodes. diff --git a/man/parhist.Rd b/man/parhist.Rd index a8319283..67bbadad 100644 --- a/man/parhist.Rd +++ b/man/parhist.Rd @@ -4,11 +4,20 @@ \alias{parhist} \title{Plot parameter distributions from multistart objects} \usage{ -parhist(object, lpos = "bottomleft", main = "", ...) +parhist( + object, + scale = c("best", "median"), + lpos = "bottomleft", + main = "", + ... +) } \arguments{ \item{object}{The \link{multistart} object} +\item{scale}{By default, scale parameters using the best available fit. +If 'median', parameters are scaled using the median parameters from all fits.} + \item{lpos}{Positioning of the legend.} \item{main}{Title of the plot} @@ -16,8 +25,9 @@ parhist(object, lpos = "bottomleft", main = "", ...) \item{\dots}{Passed to \link{boxplot}} } \description{ -Produces a boxplot with all parameters from the multiple runs, divided by -using their medians as in the paper by Duchesne et al. (2021). +Produces a boxplot with all parameters from the multiple runs, scaled +either by the parameters of the run with the highest likelihood, +or by their medians as proposed in the paper by Duchesne et al. (2021). } \references{ Duchesne R, Guillemin A, Gandrillon O, Crauste F. Practical -- cgit v1.2.1