From 900790b4139dd672c7383a3ed6ad2c1e51d855b9 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Mon, 28 Oct 2019 16:39:14 +0100 Subject: Parallel computation for confidence intervals Only on Linux at the moment. Some more examples in the help page. Remove the distribution argument for the quadratic method --- GNUmakefile | 2 +- R/confint.mkinfit.R | 100 +++++++++++++++++++++++------ docs/reference/confint.mkinfit.html | 123 +++++++++++++++++++++++++++++++++--- man/confint.mkinfit.Rd | 79 +++++++++++++++++++++-- 4 files changed, 268 insertions(+), 36 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index a98129e0..496bb252 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -97,7 +97,7 @@ vignettes/web_only/%.html: vignettes/references.bib vignettes/web_only/%.Rmd articles: vignettes/web_only/FOCUS_Z.html vignettes/web_only/compiled_models.html -pd: +pd: roxygen "$(RBIN)/Rscript" -e "pkgdown::build_site(run_dont_run = TRUE, lazy = TRUE)" git add -A git commit -m 'Static documentation rebuilt by pkgdown' -e diff --git a/R/confint.mkinfit.R b/R/confint.mkinfit.R index 8467a85b..75813360 100644 --- a/R/confint.mkinfit.R +++ b/R/confint.mkinfit.R @@ -22,15 +22,18 @@ #' @param backtransform If we approximate the likelihood in terms of the #' transformed parameters, should we backtransform the parameters with #' their confidence intervals? -#' @param distribution For the quadratic approximation, should we use -#' the student t distribution or assume normal distribution for -#' the parameter estimate -#' @param quiet Should we suppress messages? +#' @param cores The number of cores to be used for multicore processing. This +#' is only used when the \code{cluster} argument is \code{NULL}. On Windows +#' machines, cores > 1 is not supported. +#' @param quiet Should we suppress the message "Profiling the likelihood" #' @return A matrix with columns giving lower and upper confidence limits for #' each parameter. #' @param \dots Not used #' @importFrom stats qnorm -#' @references Pawitan Y (2013) In all likelihood - Statistical modelling and +#' @references +#' Bates DM and Watts GW (1988) Nonlinear regression analysis & its applications +#' +#' Pawitan Y (2013) In all likelihood - Statistical modelling and #' inference using likelihood. Clarendon Press, Oxford. #' #' Venzon DJ and Moolgavkar SH (1988) A Method for Computing @@ -39,15 +42,78 @@ #' @examples #' f <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE) #' confint(f, method = "quadratic") +#' #' \dontrun{ -#' confint(f, method = "profile") +#' confint(f, method = "profile") +#' +#' SFO_SFO <- mkinmod(parent = mkinsub("SFO", "m1"), m1 = mkinsub("SFO"), quiet = TRUE) +#' SFO_SFO.ff <- mkinmod(parent = mkinsub("SFO", "m1"), m1 = mkinsub("SFO"), +#' use_of_ff = "max", quiet = TRUE) +#' f_d_1 <- mkinfit(SFO_SFO, subset(FOCUS_2006_D, value != 0), quiet = TRUE) +#' system.time(ci_profile <- confint(f_d_1, cores = 1, quiet = TRUE)) +#' # The following does not save much time, as parent_0 takes up most of the time +#' # system.time(ci_profile <- confint(f_d_1, cores = 5)) +#' # system.time(ci_profile <- confint(f_d_1, +#' # c("k_parent_sink", "k_parent_m1", "k_m1_sink", "sigma"), cores = 1)) +#' # If we exclude parent_0 (the confidence of which is often of minor interest), we get a nice +#' # performance improvement from about 30 seconds to about 12 seconds +#' # system.time(ci_profile_no_parent_0 <- confint(f_d_1, c("k_parent_sink", "k_parent_m1", "k_m1_sink", "sigma"), cores = 4)) +#' ci_profile +#' ci_quadratic_transformed <- confint(f_d_1, method = "quadratic") +#' ci_quadratic_transformed +#' ci_quadratic_untransformed <- confint(f_d_1, method = "quadratic", transformed = FALSE) +#' ci_quadratic_untransformed +#' # Against the expectation based on Bates and Watts (1988), the confidence +#' # intervals based on the internal parameter transformation are less +#' # congruent with the likelihood based intervals. Note the superiority of the +#' # interval based on the untransformed fit for k_m1_sink +#' rel_diffs_transformed <- abs((ci_quadratic_transformed - ci_profile)/ci_profile) +#' rel_diffs_untransformed <- abs((ci_quadratic_untransformed - ci_profile)/ci_profile) +#' rel_diffs_transformed +#' rel_diffs_untransformed +#' +#' # Set the number of cores for further examples +#' if (identical(Sys.getenv("NOT_CRAN"), "true")) { +#' n_cores <- parallel::detectCores() - 1 +#' } else { +#' n_cores <- 1 +#' } +#' if (Sys.getenv("TRAVIS") != "") n_cores = 1 +#' if (Sys.info()["sysname"] == "Windows") n_cores = 1 +#' +#' # Investigate a case with formation fractions +#' f_d_2 <- mkinfit(SFO_SFO.ff, subset(FOCUS_2006_D, value != 0), quiet = TRUE) +#' ci_profile_ff <- confint(f_d_2, cores = n_cores) +#' ci_profile_ff +#' ci_quadratic_transformed_ff <- confint(f_d_2, method = "quadratic") +#' ci_quadratic_transformed_ff +#' ci_quadratic_untransformed_ff <- confint(f_d_2, method = "quadratic", transformed = FALSE) +#' ci_quadratic_untransformed_ff +#' rel_diffs_transformed_ff <- abs((ci_quadratic_transformed_ff - ci_profile_ff)/ci_profile_ff) +#' rel_diffs_untransformed_ff <- abs((ci_quadratic_untransformed_ff - ci_profile_ff)/ci_profile_ff) +#' # While the confidence interval for the parent rate constant is closer to +#' # the profile based interval when using the internal parameter +#' # transformation, the intervals for the other parameters are 'better +#' # without internal parameter transformation. +#' rel_diffs_transformed_ff +#' rel_diffs_untransformed_ff +#' +#' # The profiling for the following fit does not finish in a reasonable time +#' #m_synth_DFOP_par <- mkinmod(parent = mkinsub("DFOP", c("M1", "M2")), +#' # M1 = mkinsub("SFO"), +#' # M2 = mkinsub("SFO"), +#' # use_of_ff = "max", quiet = TRUE) +#' #DFOP_par_c <- synthetic_data_for_UBA_2014[[12]]$data +#' #f_tc_2 <- mkinfit(m_synth_DFOP_par, DFOP_par_c, error_model = "tc", +#' # error_model_algorithm = "direct", quiet = TRUE) +#' #confint(f_tc_2, "parent_0") #' } #' @export confint.mkinfit <- function(object, parm, level = 0.95, alpha = 1 - level, cutoff, method = c("profile", "quadratic"), transformed = TRUE, backtransform = TRUE, - distribution = c("student_t", "normal"), quiet = FALSE, ...) + cores = round(detectCores()/2), quiet = FALSE, ...) { tparms <- parms(object, transformed = TRUE) bparms <- parms(object, transformed = FALSE) @@ -68,11 +134,7 @@ confint.mkinfit <- function(object, parm, if (method == "quadratic") { - distribution <- match.arg(distribution) - - quantiles <- switch(distribution, - student_t = qt(a, object$df.residual), - normal = qnorm(a)) + quantiles <- qt(a, object$df.residual) covar_pnames <- if (missing(parm)) { if (transformed) tpnames else bpnames @@ -99,7 +161,7 @@ confint.mkinfit <- function(object, parm, ses <- sqrt(diag(covar))[covar_pnames] lci <- covar_parms + quantiles[1] * ses uci <- covar_parms + quantiles[2] * ses - if (backtransform) { + if (transformed & backtransform) { lci_back <- backtransform_odeparms(lci, object$mkinmod, object$transform_rates, object$transform_fractions) lci <- c(lci_back, lci[names(object$errparms)]) @@ -108,6 +170,7 @@ confint.mkinfit <- function(object, parm, uci <- c(uci_back, uci[names(object$errparms)]) } } + ci <- cbind(lower = lci, upper = uci) } if (method == "profile") { @@ -125,8 +188,7 @@ confint.mkinfit <- function(object, parm, all_parms <- parms(object) - for (pname in profile_pnames) - { + get_ci <- function(pname) { pnames_free <- setdiff(names(all_parms), pname) profile_ll <- function(x) { @@ -143,12 +205,14 @@ confint.mkinfit <- function(object, parm, (cutoff - (object$logLik - profile_ll(x)))^2 } - lci[pname] <- optimize(cost, lower = 0, upper = all_parms[pname])$minimum - uci[pname] <- optimize(cost, lower = all_parms[pname], upper = 15 * all_parms[pname])$minimum + lci_pname <- optimize(cost, lower = 0, upper = all_parms[pname])$minimum + uci_pname <- optimize(cost, lower = all_parms[pname], + upper = ifelse(grepl("^f_|^g$", pname), 1, 15 * all_parms[pname]))$minimum + return(c(lci_pname, uci_pname)) } + ci <- t(parallel::mcmapply(get_ci, profile_pnames, mc.cores = cores)) } - ci <- cbind(lower = lci, upper = uci) colnames(ci) <- paste0( format(100 * a, trim = TRUE, scientific = FALSE, digits = 3), "%") diff --git a/docs/reference/confint.mkinfit.html b/docs/reference/confint.mkinfit.html index fdbc9a3f..0053894b 100644 --- a/docs/reference/confint.mkinfit.html +++ b/docs/reference/confint.mkinfit.html @@ -144,7 +144,7 @@ could likely be improved by using the method of Venzon and Moolgavkar (1988).

confint(object, parm, level = 0.95, alpha = 1 - level, cutoff, method = c("profile", "quadratic"), transformed = TRUE, backtransform = TRUE, - distribution = c("student_t", "normal"), quiet = FALSE, ...) + cores = round(detectCores()/2), quiet = FALSE, ...)

Arguments

@@ -192,14 +192,14 @@ transformed parameters, should we backtransform the parameters with their confidence intervals?

- - + + - + @@ -213,7 +213,8 @@ the parameter estimate

each parameter.

References

-

Pawitan Y (2013) In all likelihood - Statistical modelling and +

Bates DM and Watts GW (1988) Nonlinear regression analysis & its applications

+

Pawitan Y (2013) In all likelihood - Statistical modelling and inference using likelihood. Clarendon Press, Oxford.

Venzon DJ and Moolgavkar SH (1988) A Method for Computing Profile-Likelihood Based Confidence Intervals, Applied Statistics, 37, @@ -224,11 +225,113 @@ the parameter estimate

confint(f, method="quadratic")
#> 2.5% 97.5% #> parent_0 71.8242430 93.1600766 #> k_parent_sink 0.2109541 0.4440528 -#> sigma 1.9778868 7.3681380
# \dontrun{ - confint(f, method = "profile")
#> Profiling the likelihood
#> 2.5% 97.5% +#> sigma 1.9778868 7.3681380
+# \dontrun{ +confint(f, method = "profile")
#> Profiling the likelihood
#> 2.5% 97.5% #> parent_0 73.0641834 92.1392181 #> k_parent_sink 0.2170293 0.4235348 -#> sigma 3.1307772 8.0628314
# } +#> sigma 3.1307772 8.0628314
+SFO_SFO <- mkinmod(parent = mkinsub("SFO", "m1"), m1 = mkinsub("SFO"), quiet = TRUE) +SFO_SFO.ff <- mkinmod(parent = mkinsub("SFO", "m1"), m1 = mkinsub("SFO"), + use_of_ff = "max", quiet = TRUE) +f_d_1 <- mkinfit(SFO_SFO, subset(FOCUS_2006_D, value != 0), quiet = TRUE) +system.time(ci_profile <- confint(f_d_1, cores = 1, quiet = TRUE))
#> User System verstrichen +#> 51.063 0.000 51.090
# The following does not save much time, as parent_0 takes up most of the time +# system.time(ci_profile <- confint(f_d_1, cores = 5)) +# system.time(ci_profile <- confint(f_d_1, +# c("k_parent_sink", "k_parent_m1", "k_m1_sink", "sigma"), cores = 1)) +# If we exclude parent_0 (the confidence of which is often of minor interest), we get a nice +# performance improvement from about 30 seconds to about 12 seconds +# system.time(ci_profile_no_parent_0 <- confint(f_d_1, c("k_parent_sink", "k_parent_m1", "k_m1_sink", "sigma"), cores = 4)) +ci_profile
#> 2.5% 97.5% +#> parent_0 96.456003650 1.027703e+02 +#> k_parent_sink 0.040762501 5.549764e-02 +#> k_parent_m1 0.046786482 5.500879e-02 +#> k_m1_sink 0.003892605 6.702778e-03 +#> sigma 2.535612399 3.985263e+00
ci_quadratic_transformed <- confint(f_d_1, method = "quadratic") +ci_quadratic_transformed
#> 2.5% 97.5% +#> parent_0 96.403841649 1.027931e+02 +#> k_parent_sink 0.041033378 5.596269e-02 +#> k_parent_m1 0.046777902 5.511931e-02 +#> k_m1_sink 0.004012217 6.897547e-03 +#> sigma 2.396089689 3.854918e+00
ci_quadratic_untransformed <- confint(f_d_1, method = "quadratic", transformed = FALSE) +ci_quadratic_untransformed
#> 2.5% 97.5% +#> parent_0 96.403841653 102.79312450 +#> k_parent_sink 0.040485331 0.05535491 +#> k_parent_m1 0.046611581 0.05494364 +#> k_m1_sink 0.003835483 0.00668582 +#> sigma 2.396089689 3.85491806
# Against the expectation based on Bates and Watts (1988), the confidence +# intervals based on the internal parameter transformation are less +# congruent with the likelihood based intervals. Note the superiority of the +# interval based on the untransformed fit for k_m1_sink +rel_diffs_transformed <- abs((ci_quadratic_transformed - ci_profile)/ci_profile) +rel_diffs_untransformed <- abs((ci_quadratic_untransformed - ci_profile)/ci_profile) +rel_diffs_transformed
#> 2.5% 97.5% +#> parent_0 0.0005407854 0.0002218012 +#> k_parent_sink 0.0066452394 0.0083795930 +#> k_parent_m1 0.0001833903 0.0020092090 +#> k_m1_sink 0.0307278240 0.0290580487 +#> sigma 0.0550252516 0.0327066836
rel_diffs_untransformed
#> 2.5% 97.5% +#> parent_0 0.0005407854 0.0002218011 +#> k_parent_sink 0.0067996407 0.0025717594 +#> k_parent_m1 0.0037382781 0.0011843074 +#> k_m1_sink 0.0146745610 0.0025299672 +#> sigma 0.0550252516 0.0327066836
+# Set the number of cores for further examples +if (identical(Sys.getenv("NOT_CRAN"), "true")) { + n_cores <- parallel::detectCores() - 1 +} else { + n_cores <- 1 +} +if (Sys.getenv("TRAVIS") != "") n_cores = 1 +if (Sys.info()["sysname"] == "Windows") n_cores = 1 + +# Investigate a case with formation fractions +f_d_2 <- mkinfit(SFO_SFO.ff, subset(FOCUS_2006_D, value != 0), quiet = TRUE) +ci_profile_ff <- confint(f_d_2, cores = n_cores)
#> Profiling the likelihood
ci_profile_ff
#> 2.5% 97.5% +#> parent_0 96.456003650 1.027703e+02 +#> k_parent 0.090911032 1.071578e-01 +#> k_m1 0.003892605 6.702778e-03 +#> f_parent_to_m1 0.471328495 5.611550e-01 +#> sigma 2.535612399 3.985263e+00
ci_quadratic_transformed_ff <- confint(f_d_2, method = "quadratic") +ci_quadratic_transformed_ff
#> 2.5% 97.5% +#> parent_0 96.403840123 1.027931e+02 +#> k_parent 0.090823791 1.072543e-01 +#> k_m1 0.004012216 6.897547e-03 +#> f_parent_to_m1 0.469118710 5.595960e-01 +#> sigma 2.396089689 3.854918e+00
ci_quadratic_untransformed_ff <- confint(f_d_2, method = "quadratic", transformed = FALSE) +ci_quadratic_untransformed_ff
#> 2.5% 97.5% +#> parent_0 96.403840057 1.027931e+02 +#> k_parent 0.090491932 1.069035e-01 +#> k_m1 0.003835483 6.685819e-03 +#> f_parent_to_m1 0.469113361 5.598386e-01 +#> sigma 2.396089689 3.854918e+00
rel_diffs_transformed_ff <- abs((ci_quadratic_transformed_ff - ci_profile_ff)/ci_profile_ff) +rel_diffs_untransformed_ff <- abs((ci_quadratic_untransformed_ff - ci_profile_ff)/ci_profile_ff) +# While the confidence interval for the parent rate constant is closer to +# the profile based interval when using the internal parameter +# transformation, the intervals for the other parameters are 'better +# without internal parameter transformation. +rel_diffs_transformed_ff
#> 2.5% 97.5% +#> parent_0 0.0005408012 0.0002217857 +#> k_parent 0.0009596303 0.0009003981 +#> k_m1 0.0307277425 0.0290579163 +#> f_parent_to_m1 0.0046884178 0.0027782643 +#> sigma 0.0550252516 0.0327066836
rel_diffs_untransformed_ff
#> 2.5% 97.5% +#> parent_0 0.0005408019 0.0002217863 +#> k_parent 0.0046099989 0.0023730118 +#> k_m1 0.0146746451 0.0025300990 +#> f_parent_to_m1 0.0046997668 0.0023460293 +#> sigma 0.0550252516 0.0327066836
+# The profiling for the following fit does not finish in a reasonable time +#m_synth_DFOP_par <- mkinmod(parent = mkinsub("DFOP", c("M1", "M2")), +# M1 = mkinsub("SFO"), +# M2 = mkinsub("SFO"), +# use_of_ff = "max", quiet = TRUE) +#DFOP_par_c <- synthetic_data_for_UBA_2014[[12]]$data +#f_tc_2 <- mkinfit(m_synth_DFOP_par, DFOP_par_c, error_model = "tc", +# error_model_algorithm = "direct", quiet = TRUE) +#confint(f_tc_2, "parent_0") +# }
distribution

For the quadratic approximation, should we use -the student t distribution or assume normal distribution for -the parameter estimate

cores

The number of cores to be used for multicore processing. This +is only used when the cluster argument is NULL. On Windows +machines, cores > 1 is not supported.

quiet

Should we suppress messages?

Should we suppress the message "Profiling the likelihood"

...