From 900790b4139dd672c7383a3ed6ad2c1e51d855b9 Mon Sep 17 00:00:00 2001
From: Johannes Ranke
Date: Mon, 28 Oct 2019 16:39:14 +0100
Subject: Parallel computation for confidence intervals
Only on Linux at the moment. Some more examples in the help page.
Remove the distribution argument for the quadratic method
---
GNUmakefile | 2 +-
R/confint.mkinfit.R | 100 +++++++++++++++++++++++------
docs/reference/confint.mkinfit.html | 123 +++++++++++++++++++++++++++++++++---
man/confint.mkinfit.Rd | 79 +++++++++++++++++++++--
4 files changed, 268 insertions(+), 36 deletions(-)
diff --git a/GNUmakefile b/GNUmakefile
index a98129e0..496bb252 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -97,7 +97,7 @@ vignettes/web_only/%.html: vignettes/references.bib vignettes/web_only/%.Rmd
articles: vignettes/web_only/FOCUS_Z.html vignettes/web_only/compiled_models.html
-pd:
+pd: roxygen
"$(RBIN)/Rscript" -e "pkgdown::build_site(run_dont_run = TRUE, lazy = TRUE)"
git add -A
git commit -m 'Static documentation rebuilt by pkgdown' -e
diff --git a/R/confint.mkinfit.R b/R/confint.mkinfit.R
index 8467a85b..75813360 100644
--- a/R/confint.mkinfit.R
+++ b/R/confint.mkinfit.R
@@ -22,15 +22,18 @@
#' @param backtransform If we approximate the likelihood in terms of the
#' transformed parameters, should we backtransform the parameters with
#' their confidence intervals?
-#' @param distribution For the quadratic approximation, should we use
-#' the student t distribution or assume normal distribution for
-#' the parameter estimate
-#' @param quiet Should we suppress messages?
+#' @param cores The number of cores to be used for multicore processing. This
+#' is only used when the \code{cluster} argument is \code{NULL}. On Windows
+#' machines, cores > 1 is not supported.
+#' @param quiet Should we suppress the message "Profiling the likelihood"
#' @return A matrix with columns giving lower and upper confidence limits for
#' each parameter.
#' @param \dots Not used
#' @importFrom stats qnorm
-#' @references Pawitan Y (2013) In all likelihood - Statistical modelling and
+#' @references
+#' Bates DM and Watts GW (1988) Nonlinear regression analysis & its applications
+#'
+#' Pawitan Y (2013) In all likelihood - Statistical modelling and
#' inference using likelihood. Clarendon Press, Oxford.
#'
#' Venzon DJ and Moolgavkar SH (1988) A Method for Computing
@@ -39,15 +42,78 @@
#' @examples
#' f <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE)
#' confint(f, method = "quadratic")
+#'
#' \dontrun{
-#' confint(f, method = "profile")
+#' confint(f, method = "profile")
+#'
+#' SFO_SFO <- mkinmod(parent = mkinsub("SFO", "m1"), m1 = mkinsub("SFO"), quiet = TRUE)
+#' SFO_SFO.ff <- mkinmod(parent = mkinsub("SFO", "m1"), m1 = mkinsub("SFO"),
+#' use_of_ff = "max", quiet = TRUE)
+#' f_d_1 <- mkinfit(SFO_SFO, subset(FOCUS_2006_D, value != 0), quiet = TRUE)
+#' system.time(ci_profile <- confint(f_d_1, cores = 1, quiet = TRUE))
+#' # The following does not save much time, as parent_0 takes up most of the time
+#' # system.time(ci_profile <- confint(f_d_1, cores = 5))
+#' # system.time(ci_profile <- confint(f_d_1,
+#' # c("k_parent_sink", "k_parent_m1", "k_m1_sink", "sigma"), cores = 1))
+#' # If we exclude parent_0 (the confidence of which is often of minor interest), we get a nice
+#' # performance improvement from about 30 seconds to about 12 seconds
+#' # system.time(ci_profile_no_parent_0 <- confint(f_d_1, c("k_parent_sink", "k_parent_m1", "k_m1_sink", "sigma"), cores = 4))
+#' ci_profile
+#' ci_quadratic_transformed <- confint(f_d_1, method = "quadratic")
+#' ci_quadratic_transformed
+#' ci_quadratic_untransformed <- confint(f_d_1, method = "quadratic", transformed = FALSE)
+#' ci_quadratic_untransformed
+#' # Against the expectation based on Bates and Watts (1988), the confidence
+#' # intervals based on the internal parameter transformation are less
+#' # congruent with the likelihood based intervals. Note the superiority of the
+#' # interval based on the untransformed fit for k_m1_sink
+#' rel_diffs_transformed <- abs((ci_quadratic_transformed - ci_profile)/ci_profile)
+#' rel_diffs_untransformed <- abs((ci_quadratic_untransformed - ci_profile)/ci_profile)
+#' rel_diffs_transformed
+#' rel_diffs_untransformed
+#'
+#' # Set the number of cores for further examples
+#' if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+#' n_cores <- parallel::detectCores() - 1
+#' } else {
+#' n_cores <- 1
+#' }
+#' if (Sys.getenv("TRAVIS") != "") n_cores = 1
+#' if (Sys.info()["sysname"] == "Windows") n_cores = 1
+#'
+#' # Investigate a case with formation fractions
+#' f_d_2 <- mkinfit(SFO_SFO.ff, subset(FOCUS_2006_D, value != 0), quiet = TRUE)
+#' ci_profile_ff <- confint(f_d_2, cores = n_cores)
+#' ci_profile_ff
+#' ci_quadratic_transformed_ff <- confint(f_d_2, method = "quadratic")
+#' ci_quadratic_transformed_ff
+#' ci_quadratic_untransformed_ff <- confint(f_d_2, method = "quadratic", transformed = FALSE)
+#' ci_quadratic_untransformed_ff
+#' rel_diffs_transformed_ff <- abs((ci_quadratic_transformed_ff - ci_profile_ff)/ci_profile_ff)
+#' rel_diffs_untransformed_ff <- abs((ci_quadratic_untransformed_ff - ci_profile_ff)/ci_profile_ff)
+#' # While the confidence interval for the parent rate constant is closer to
+#' # the profile based interval when using the internal parameter
+#' # transformation, the intervals for the other parameters are 'better
+#' # without internal parameter transformation.
+#' rel_diffs_transformed_ff
+#' rel_diffs_untransformed_ff
+#'
+#' # The profiling for the following fit does not finish in a reasonable time
+#' #m_synth_DFOP_par <- mkinmod(parent = mkinsub("DFOP", c("M1", "M2")),
+#' # M1 = mkinsub("SFO"),
+#' # M2 = mkinsub("SFO"),
+#' # use_of_ff = "max", quiet = TRUE)
+#' #DFOP_par_c <- synthetic_data_for_UBA_2014[[12]]$data
+#' #f_tc_2 <- mkinfit(m_synth_DFOP_par, DFOP_par_c, error_model = "tc",
+#' # error_model_algorithm = "direct", quiet = TRUE)
+#' #confint(f_tc_2, "parent_0")
#' }
#' @export
confint.mkinfit <- function(object, parm,
level = 0.95, alpha = 1 - level, cutoff,
method = c("profile", "quadratic"),
transformed = TRUE, backtransform = TRUE,
- distribution = c("student_t", "normal"), quiet = FALSE, ...)
+ cores = round(detectCores()/2), quiet = FALSE, ...)
{
tparms <- parms(object, transformed = TRUE)
bparms <- parms(object, transformed = FALSE)
@@ -68,11 +134,7 @@ confint.mkinfit <- function(object, parm,
if (method == "quadratic") {
- distribution <- match.arg(distribution)
-
- quantiles <- switch(distribution,
- student_t = qt(a, object$df.residual),
- normal = qnorm(a))
+ quantiles <- qt(a, object$df.residual)
covar_pnames <- if (missing(parm)) {
if (transformed) tpnames else bpnames
@@ -99,7 +161,7 @@ confint.mkinfit <- function(object, parm,
ses <- sqrt(diag(covar))[covar_pnames]
lci <- covar_parms + quantiles[1] * ses
uci <- covar_parms + quantiles[2] * ses
- if (backtransform) {
+ if (transformed & backtransform) {
lci_back <- backtransform_odeparms(lci,
object$mkinmod, object$transform_rates, object$transform_fractions)
lci <- c(lci_back, lci[names(object$errparms)])
@@ -108,6 +170,7 @@ confint.mkinfit <- function(object, parm,
uci <- c(uci_back, uci[names(object$errparms)])
}
}
+ ci <- cbind(lower = lci, upper = uci)
}
if (method == "profile") {
@@ -125,8 +188,7 @@ confint.mkinfit <- function(object, parm,
all_parms <- parms(object)
- for (pname in profile_pnames)
- {
+ get_ci <- function(pname) {
pnames_free <- setdiff(names(all_parms), pname)
profile_ll <- function(x)
{
@@ -143,12 +205,14 @@ confint.mkinfit <- function(object, parm,
(cutoff - (object$logLik - profile_ll(x)))^2
}
- lci[pname] <- optimize(cost, lower = 0, upper = all_parms[pname])$minimum
- uci[pname] <- optimize(cost, lower = all_parms[pname], upper = 15 * all_parms[pname])$minimum
+ lci_pname <- optimize(cost, lower = 0, upper = all_parms[pname])$minimum
+ uci_pname <- optimize(cost, lower = all_parms[pname],
+ upper = ifelse(grepl("^f_|^g$", pname), 1, 15 * all_parms[pname]))$minimum
+ return(c(lci_pname, uci_pname))
}
+ ci <- t(parallel::mcmapply(get_ci, profile_pnames, mc.cores = cores))
}
- ci <- cbind(lower = lci, upper = uci)
colnames(ci) <- paste0(
format(100 * a, trim = TRUE, scientific = FALSE, digits = 3), "%")
diff --git a/docs/reference/confint.mkinfit.html b/docs/reference/confint.mkinfit.html
index fdbc9a3f..0053894b 100644
--- a/docs/reference/confint.mkinfit.html
+++ b/docs/reference/confint.mkinfit.html
@@ -144,7 +144,7 @@ could likely be improved by using the method of Venzon and Moolgavkar (1988).
confint(object, parm, level = 0.95, alpha = 1 -
level, cutoff, method = c("profile", "quadratic"),
transformed = TRUE, backtransform = TRUE,
- distribution = c("student_t", "normal"), quiet = FALSE, ...)
+ cores = round(detectCores()/2), quiet = FALSE, ...)
Arguments
@@ -192,14 +192,14 @@ transformed parameters, should we backtransform the parameters with
their confidence intervals?
- distribution |
- For the quadratic approximation, should we use
-the student t distribution or assume normal distribution for
-the parameter estimate |
+ cores |
+ The number of cores to be used for multicore processing. This
+is only used when the cluster argument is NULL . On Windows
+machines, cores > 1 is not supported. |
quiet |
- Should we suppress messages? |
+ Should we suppress the message "Profiling the likelihood" |
... |
@@ -213,7 +213,8 @@ the parameter estimate
each parameter.
References
- Pawitan Y (2013) In all likelihood - Statistical modelling and
+
Bates DM and Watts GW (1988) Nonlinear regression analysis & its applications
+Pawitan Y (2013) In all likelihood - Statistical modelling and
inference using likelihood. Clarendon Press, Oxford.
Venzon DJ and Moolgavkar SH (1988) A Method for Computing
Profile-Likelihood Based Confidence Intervals, Applied Statistics, 37,
@@ -224,11 +225,113 @@ the parameter estimate
confint(f, method = "quadratic")#> 2.5% 97.5%
#> parent_0 71.8242430 93.1600766
#> k_parent_sink 0.2109541 0.4440528
-#> sigma 1.9778868 7.3681380
#> Profiling the likelihood
#> 2.5% 97.5%
+#> sigma 1.9778868 7.3681380
#> Profiling the likelihood
#> 2.5% 97.5%
#> parent_0 73.0641834 92.1392181
#> k_parent_sink 0.2170293 0.4235348
-#> sigma 3.1307772 8.0628314
# }
+#> sigma 3.1307772 8.0628314
#> User System verstrichen
+#> 51.063 0.000 51.090
# The following does not save much time, as parent_0 takes up most of the time
+# system.time(ci_profile <- confint(f_d_1, cores = 5))
+# system.time(ci_profile <- confint(f_d_1,
+# c("k_parent_sink", "k_parent_m1", "k_m1_sink", "sigma"), cores = 1))
+# If we exclude parent_0 (the confidence of which is often of minor interest), we get a nice
+# performance improvement from about 30 seconds to about 12 seconds
+# system.time(ci_profile_no_parent_0 <- confint(f_d_1, c("k_parent_sink", "k_parent_m1", "k_m1_sink", "sigma"), cores = 4))
+ci_profile
#> 2.5% 97.5%
+#> parent_0 96.456003650 1.027703e+02
+#> k_parent_sink 0.040762501 5.549764e-02
+#> k_parent_m1 0.046786482 5.500879e-02
+#> k_m1_sink 0.003892605 6.702778e-03
+#> sigma 2.535612399 3.985263e+00
#> 2.5% 97.5%
+#> parent_0 96.403841649 1.027931e+02
+#> k_parent_sink 0.041033378 5.596269e-02
+#> k_parent_m1 0.046777902 5.511931e-02
+#> k_m1_sink 0.004012217 6.897547e-03
+#> sigma 2.396089689 3.854918e+00
#> 2.5% 97.5%
+#> parent_0 96.403841653 102.79312450
+#> k_parent_sink 0.040485331 0.05535491
+#> k_parent_m1 0.046611581 0.05494364
+#> k_m1_sink 0.003835483 0.00668582
+#> sigma 2.396089689 3.85491806
#> 2.5% 97.5%
+#> parent_0 0.0005407854 0.0002218012
+#> k_parent_sink 0.0066452394 0.0083795930
+#> k_parent_m1 0.0001833903 0.0020092090
+#> k_m1_sink 0.0307278240 0.0290580487
+#> sigma 0.0550252516 0.0327066836
rel_diffs_untransformed
#> 2.5% 97.5%
+#> parent_0 0.0005407854 0.0002218011
+#> k_parent_sink 0.0067996407 0.0025717594
+#> k_parent_m1 0.0037382781 0.0011843074
+#> k_m1_sink 0.0146745610 0.0025299672
+#> sigma 0.0550252516 0.0327066836
#> Profiling the likelihood
ci_profile_ff
#> 2.5% 97.5%
+#> parent_0 96.456003650 1.027703e+02
+#> k_parent 0.090911032 1.071578e-01
+#> k_m1 0.003892605 6.702778e-03
+#> f_parent_to_m1 0.471328495 5.611550e-01
+#> sigma 2.535612399 3.985263e+00
#> 2.5% 97.5%
+#> parent_0 96.403840123 1.027931e+02
+#> k_parent 0.090823791 1.072543e-01
+#> k_m1 0.004012216 6.897547e-03
+#> f_parent_to_m1 0.469118710 5.595960e-01
+#> sigma 2.396089689 3.854918e+00
#> 2.5% 97.5%
+#> parent_0 96.403840057 1.027931e+02
+#> k_parent 0.090491932 1.069035e-01
+#> k_m1 0.003835483 6.685819e-03
+#> f_parent_to_m1 0.469113361 5.598386e-01
+#> sigma 2.396089689 3.854918e+00
#> 2.5% 97.5%
+#> parent_0 0.0005408012 0.0002217857
+#> k_parent 0.0009596303 0.0009003981
+#> k_m1 0.0307277425 0.0290579163
+#> f_parent_to_m1 0.0046884178 0.0027782643
+#> sigma 0.0550252516 0.0327066836
rel_diffs_untransformed_ff
#> 2.5% 97.5%
+#> parent_0 0.0005408019 0.0002217863
+#> k_parent 0.0046099989 0.0023730118
+#> k_m1 0.0146746451 0.0025300990
+#> f_parent_to_m1 0.0046997668 0.0023460293
+#> sigma 0.0550252516 0.0327066836
+# The profiling for the following fit does not finish in a reasonable time
+#m_synth_DFOP_par <- mkinmod(parent = mkinsub("DFOP", c("M1", "M2")),
+# M1 = mkinsub("SFO"),
+# M2 = mkinsub("SFO"),
+# use_of_ff = "max", quiet = TRUE)
+#DFOP_par_c <- synthetic_data_for_UBA_2014[[12]]$data
+#f_tc_2 <- mkinfit(m_synth_DFOP_par, DFOP_par_c, error_model = "tc",
+# error_model_algorithm = "direct", quiet = TRUE)
+#confint(f_tc_2, "parent_0")
+# }