From fd205e13061de8abc595d266f3b0c7650773d442 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Fri, 16 Sep 2022 21:06:54 +0200 Subject: Improve multistart documentation, bugfix - Split out llhist and parhist documentation - Add example code for multistart - Create a multistart vignette, because the example code fails when run by pkgdown - Fix multistart for the case of mkin transformations in the saem fit --- R/llhist.R | 30 +++++++++++++++++++ R/multistart.R | 91 +++++++++++++++++++++++----------------------------------- R/parhist.R | 52 +++++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+), 55 deletions(-) create mode 100644 R/llhist.R create mode 100644 R/parhist.R (limited to 'R') diff --git a/R/llhist.R b/R/llhist.R new file mode 100644 index 00000000..6a6d93cd --- /dev/null +++ b/R/llhist.R @@ -0,0 +1,30 @@ +#' Plot the distribution of log likelihoods from multistart objects +#' +#' Produces a histogram of log-likelihoods, and an overlayed kernel density +#' estimate. In addition, the likelihood of the original fit is shown as +#' a red vertical line. +#' +#' @param object The [multistart] object +#' @param breaks Passed to [hist] +#' @param lpos Positioning of the legend. +#' @param main Title of the plot +#' @param \dots Passed to [hist] +#' @seealso [multistart] +#' @importFrom KernSmooth bkde +#' @export +llhist <- function(object, breaks = "Sturges", lpos = "topleft", main = "", ...) { + ll <- sapply(object, logLik) + kde <- KernSmooth::bkde(ll) + h <- hist(ll, freq = TRUE, + xlim = range(kde$x), + xlab = "", main = main, + ylab = "Frequency of log likelihoods", breaks = breaks, ...) + + freq_factor <- h$counts[1] / h$density[1] + lines(kde$x, freq_factor * kde$y) + abline(v = logLik(attr(object, "orig")), col = 2) + legend(lpos, inset = c(0.05, 0.05), bty = "n", + lty = 1, col = c(2, 1), + legend = c("original log likelihood", + "kernel density estimate")) +} diff --git a/R/multistart.R b/R/multistart.R index 52f279f0..6fd98522 100644 --- a/R/multistart.R +++ b/R/multistart.R @@ -10,24 +10,49 @@ #' Currently, parallel execution of the fits is only supported using #' [parallel::mclapply], i.e. not available on Windows. #' +#' In case the online version of this help page contains error messages +#' in the example code and no plots, this is due to the multistart method +#' not working when called by pkgdown. Please refer to the +#' [online vignette](https://pkgdown.jrwb.de/mkin/dev/articles/web_only/multistart.html) +#' in this case. +#' #' @param object The fit object to work with #' @param n How many different combinations of starting parameters should be #' used? #' @param cores How many fits should be run in parallel? -#' @param \dots Passed to the update function, or to the basic plotting -#' function in the case of the graphical functions. +#' @param \dots Passed to the update function. #' @param x The multistart object to print -#' @param breaks Passed to [hist] -#' @param main title of the plot -#' @param lpos Positioning of the legend. #' @return A list of [saem.mmkin] objects, with class attributes #' 'multistart.saem.mmkin' and 'multistart'. +#' @seealso [parhist], [llhist] #' #' @references Duchesne R, Guillemin A, Gandrillon O, Crauste F. Practical #' identifiability in the frame of nonlinear mixed effects models: the example #' of the in vitro erythropoiesis. BMC Bioinformatics. 2021 Oct 4;22(1):478. #' doi: 10.1186/s12859-021-04373-4. #' @export +#' @examples +#' \dontrun{ +#' library(mkin) +#' dmta_ds <- lapply(1:7, function(i) { +#' ds_i <- dimethenamid_2018$ds[[i]]$data +#' ds_i[ds_i$name == "DMTAP", "name"] <- "DMTA" +#' ds_i$time <- ds_i$time * dimethenamid_2018$f_time_norm[i] +#' ds_i +#' }) +#' names(dmta_ds) <- sapply(dimethenamid_2018$ds, function(ds) ds$title) +#' dmta_ds[["Elliot"]] <- rbind(dmta_ds[["Elliot 1"]], dmta_ds[["Elliot 2"]]) +#' dmta_ds[["Elliot 1"]] <- dmta_ds[["Elliot 2"]] <- NULL +#' +#' f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE) +#' f_saem_full <- saem(f_mmkin) +#' f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16) +#' parhist(f_saem_full_multi, lpos = "bottomright") +#' +#' f_saem_reduced <- update(f_saem_full, covariance.model = diag(c(1, 1, 0, 1))) +#' f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16) +#' parhist(f_saem_reduced_multi, lpos = "bottomright") +#' } multistart <- function(object, n = 50, cores = 1, ...) { UseMethod("multistart", object) @@ -36,10 +61,13 @@ multistart <- function(object, n = 50, cores = 1, ...) #' @rdname multistart #' @export multistart.saem.mmkin <- function(object, n = 50, cores = 1, ...) { + + mmkin_parms <- parms(object$mmkin, errparms = FALSE, + transformed = object$transformations == "mkin") start_parms <- apply( - parms(object$mmkin, errparms = FALSE), 1, - function(x) stats::runif(n, min(x), max(x)) - ) + mmkin_parms, 1, + function(x) stats::runif(n, min(x), max(x))) + if (n == 1) dim(start_parms) <- c(1, length(start_parms)) res <- parallel::mclapply(1:n, function(x) { update(object, degparms_start = start_parms[x, ], ...) @@ -62,50 +90,3 @@ print.multistart <- function(x, ...) { parms.multistart <- function(object, ...) { t(sapply(object, parms)) } - -#' @rdname multistart -#' @importFrom stats median -#' @export -parhist <- function(object, lpos = "topleft", main = "", ...) { - orig <- attr(object, "orig") - orig_parms <- parms(orig) - start_parms <- orig$mean_dp_start - all_parms <- parms(object) - median_parms <- apply(all_parms, 2, median) - all_scaled_parms <- t(apply(all_parms, 1, function(x) x / median_parms)) - orig_scaled_parms <- orig_parms / median_parms - start_scaled_parms <- rep(NA_real_, length(orig_parms)) - names(start_scaled_parms) <- names(orig_parms) - start_scaled_parms[names(start_parms)] <- - start_parms / median_parms[names(start_parms)] - - boxplot(all_scaled_parms, log = "y", main = main, ...) - points(orig_scaled_parms, col = 2, cex = 2) - points(start_scaled_parms, col = 3, cex = 3) - legend(lpos, inset = c(0.05, 0.05), bty = "n", - pch = 1, col = 3:1, lty = c(NA, NA, 1), - legend = c( - "Starting parameters", - "Converged parameters", - "Multistart runs")) -} - -#' @rdname multistart -#' @importFrom KernSmooth bkde -#' @export -llhist <- function(object, breaks = "Sturges", lpos = "topleft", main = "", ...) { - ll <- sapply(object, logLik) - kde <- KernSmooth::bkde(ll) - h <- hist(ll, freq = TRUE, - xlim = range(kde$x), - xlab = "", main = main, - ylab = "Frequency of log likelihoods", breaks = breaks, ...) - - freq_factor <- h$counts[1] / h$density[1] - lines(kde$x, freq_factor * kde$y) - abline(v = logLik(attr(object, "orig")), col = 2) - legend(lpos, inset = c(0.05, 0.05), bty = "n", - lty = 1, col = c(2, 1), - legend = c("original log likelihood", - "kernel density estimate")) -} diff --git a/R/parhist.R b/R/parhist.R new file mode 100644 index 00000000..62f67249 --- /dev/null +++ b/R/parhist.R @@ -0,0 +1,52 @@ +#' Plot parameter distributions from multistart objects +#' +#' Produces a boxplot with all parameters from the multiple runs, scaled using +#' their medians. If parameter transformations were done by mkin (default in +#' [saem]), then the parameters found by saem are on the transformed scale, and +#' scaling is simply done by subtracting the median. If parameter +#' transformations were done by saemix, scaling is done by dividing by the +#' median, as in the paper by Duchesne et al. (2021). +#' +#' @param object The [multistart] object +#' @param \dots Passed to [boxplot] +#' @param main Title of the plot +#' @param lpos Positioning of the legend. +#' @references Duchesne R, Guillemin A, Gandrillon O, Crauste F. Practical +#' identifiability in the frame of nonlinear mixed effects models: the example +#' of the in vitro erythropoiesis. BMC Bioinformatics. 2021 Oct 4;22(1):478. +#' doi: 10.1186/s12859-021-04373-4. +#' @seealso [multistart] +#' @importFrom stats median +#' @export +parhist <- function(object, lpos = "topleft", main = "", ...) { + orig <- attr(object, "orig") + orig_parms <- parms(orig) + start_parms <- orig$mean_dp_start + all_parms <- parms(object) + median_parms <- apply(all_parms, 2, median) + start_scaled_parms <- rep(NA_real_, length(orig_parms)) + names(start_scaled_parms) <- names(orig_parms) + + if (orig$transformations == "saemix") { + orig_scaled_parms <- orig_parms / median_parms + all_scaled_parms <- t(apply(all_parms, 1, function(x) x / median_parms)) + start_scaled_parms[names(start_parms)] <- + start_parms / median_parms[names(start_parms)] + boxplot(all_scaled_parms, log = "y", main = main, ...) + } else { + orig_scaled_parms <- orig_parms - median_parms + all_scaled_parms <- t(apply(all_parms, 1, function(x) x - median_parms)) + start_scaled_parms[names(start_parms)] <- + start_parms - median_parms[names(start_parms)] + boxplot(all_scaled_parms, main = main, ...) + } + + points(orig_scaled_parms, col = 2, cex = 2) + points(start_scaled_parms, col = 3, cex = 3) + legend(lpos, inset = c(0.05, 0.05), bty = "n", + pch = 1, col = 3:1, lty = c(NA, NA, 1), + legend = c( + "Starting parameters", + "Converged parameters", + "Multistart runs")) +} -- cgit v1.2.1