Fix the cutoff for likelihood based intervals

The cutoff now matches what is given by Venzon and Moolgavkar (1988). Also, confidence intervals closely match intervals obtained with stats4::confint in the test case where an stats4::mle object is created from the likelihood function in one test case. Static documentation rebuilt by pkgdown
author: Johannes Ranke <jranke@uni-bremen.de> 2019-10-28 13:23:40 +0100
committer: Johannes Ranke <jranke@uni-bremen.de> 2019-10-28 13:23:40 +0100
commit: bd761d879a95872f82e6d8f893634a61e122a938 (patch)
tree: 0b66fa5064eb5bd9e0014de18f433c2692fcbc00
parent: 6e5630a0df7e857697ff2ce4730a5f7f45b67377 (diff)
7 files changed, 105 insertions, 86 deletions
diff --git a/DESCRIPTION b/DESCRIPTION
index 88412fa8..544af139 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -19,7 +19,7 @@ Description: Calculation routines based on the FOCUS Kinetics Report (2006,
   purpose.
 Imports: stats, graphics, methods, deSolve, R6, inline, parallel, numDeriv
 Suggests: knitr, rbenchmark, tikzDevice, testthat, rmarkdown, covr, vdiffr,
-  benchmarkme, tibble
+  benchmarkme, tibble, stats4
 License: GPL
 LazyLoad: yes
 LazyData: yes
diff --git a/R/confint.mkinfit.R b/R/confint.mkinfit.R
index 58a3c8db..1ed689a9 100644
--- a/R/confint.mkinfit.R
+++ b/R/confint.mkinfit.R
@@ -1,10 +1,17 @@
 #' Confidence intervals for parameters of mkinfit objects
 #'
+#' The default method 'profile' is based on the profile likelihood for each
+#' parameter. The method uses two nested optimisations. The speed of the method
+#' could likely be improved by using the method of Venzon and Moolgavkar (1988).
+#'
 #' @param object An \code{\link{mkinfit}} object
 #' @param parm A vector of names of the parameters which are to be given
 #'   confidence intervals. If missing, all parameters are considered.
 #' @param level The confidence level required
 #' @param alpha The allowed error probability, overrides 'level' if specified.
+#' @param cutoff Possibility to specify an alternative cutoff for the difference
+#'   in the log-likelihoods at the confidence boundary. Specifying an explicit
+#'   cutoff value overrides arguments 'level' and 'alpha'
 #' @param method The 'profile' method searches the parameter space for the
 #'   cutoff of the confidence intervals by means of a likelihood ratio test.
 #'   The 'quadratic' method approximates the likelihood function at the
@@ -25,6 +32,9 @@
 #' @importFrom stats qnorm
 #' @references Pawitan Y (2013) In all likelihood - Statistical modelling and
 #'   inference using likelihood. Clarendon Press, Oxford.
+#'   Venzon DJ and Moolgavkar SH (1988) A Method for Computing
+#'   Profile-Likelihood Based Confidence Intervals, Applied Statistics, 37,
+#'   87–94.
 #' @examples
 #' f <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE)
 #' confint(f, method = "quadratic")
@@ -33,7 +43,7 @@
 #' }
 #' @export
 confint.mkinfit <- function(object, parm,
-  level = 0.95, alpha = 1 - level,
+  level = 0.95, alpha = 1 - level, cutoff,
   method = c("profile", "quadratic"),
   transformed = TRUE, backtransform = TRUE,
   distribution = c("student_t", "normal"), quiet = FALSE, ...)
@@ -100,15 +110,17 @@ confint.mkinfit <- function(object, parm,
   }
 
   if (method == "profile") {
-    message("Profiling the likelihood")
+    if (!quiet) message("Profiling the likelihood")
+
     lci <- uci <- rep(NA, p)
     names(lci) <- names(uci) <- return_pnames
 
     profile_pnames <- if(missing(parm)) names(parms(object))
       else parm
 
-    # We do two-sided intervals based on the likelihood ratio
-    cutoff <- 0.5 * qchisq(1 - (alpha / 2), 1)
+    if (missing(cutoff)) {
+      cutoff <- 0.5 * qchisq(1 - alpha, 1)
+    }
 
     all_parms <- parms(object)
 
diff --git a/docs/reference/confint.mkinfit.html b/docs/reference/confint.mkinfit.html
index 64ea0d4c..ec8938c5 100644
--- a/docs/reference/confint.mkinfit.html
+++ b/docs/reference/confint.mkinfit.html
@@ -36,7 +36,9 @@
 
 
 <meta property="og:title" content="Confidence intervals for parameters of mkinfit objects — confint.mkinfit" />
-<meta property="og:description" content="Confidence intervals for parameters of mkinfit objects" />
+<meta property="og:description" content="The default method 'profile' is based on the profile likelihood for each
+parameter. The method uses two nested optimisations. The speed of the method
+could likely be improved by using the method of Venzon and Moolgavkar (1988)." />
 <meta name="twitter:card" content="summary" />
 
 
@@ -133,14 +135,16 @@
     </div>
 
     <div class="ref-description">
-    <p>Confidence intervals for parameters of mkinfit objects</p>
+    <p>The default method 'profile' is based on the profile likelihood for each
+parameter. The method uses two nested optimisations. The speed of the method
+could likely be improved by using the method of Venzon and Moolgavkar (1988).</p>
     </div>
 
     <pre class="usage"><span class='co'># S3 method for mkinfit</span>
 <span class='fu'><a href='https://rdrr.io/r/stats/confint.html'>confint</a></span>(<span class='no'>object</span>, <span class='no'>parm</span>, <span class='kw'>level</span> <span class='kw'>=</span> <span class='fl'>0.95</span>, <span class='kw'>alpha</span> <span class='kw'>=</span> <span class='fl'>1</span> -
-  <span class='no'>level</span>, <span class='kw'>method</span> <span class='kw'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/c.html'>c</a></span>(<span class='st'>"profile"</span>, <span class='st'>"quadratic"</span>), <span class='kw'>transformed</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
-  <span class='kw'>backtransform</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>, <span class='kw'>distribution</span> <span class='kw'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/c.html'>c</a></span>(<span class='st'>"student_t"</span>, <span class='st'>"normal"</span>),
-  <span class='kw'>quiet</span> <span class='kw'>=</span> <span class='fl'>FALSE</span>, <span class='no'>...</span>)</pre>
+  <span class='no'>level</span>, <span class='no'>cutoff</span>, <span class='kw'>method</span> <span class='kw'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/c.html'>c</a></span>(<span class='st'>"profile"</span>, <span class='st'>"quadratic"</span>),
+  <span class='kw'>transformed</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>, <span class='kw'>backtransform</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
+  <span class='kw'>distribution</span> <span class='kw'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/c.html'>c</a></span>(<span class='st'>"student_t"</span>, <span class='st'>"normal"</span>), <span class='kw'>quiet</span> <span class='kw'>=</span> <span class='fl'>FALSE</span>, <span class='no'>...</span>)</pre>
 
     <h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
     <table class="ref-arguments">
@@ -163,6 +167,12 @@ confidence intervals. If missing, all parameters are considered.</p></td>
       <td><p>The allowed error probability, overrides 'level' if specified.</p></td>
     </tr>
     <tr>
+      <th>cutoff</th>
+      <td><p>Possibility to specify an alternative cutoff for the difference
+in the log-likelihoods at the confidence boundary. Specifying an explicit
+cutoff value overrides arguments 'level' and 'alpha'</p></td>
+    </tr>
+    <tr>
       <th>method</th>
       <td><p>The 'profile' method searches the parameter space for the
 cutoff of the confidence intervals by means of a likelihood ratio test.
@@ -204,7 +214,10 @@ the parameter estimate</p></td>
     <h2 class="hasAnchor" id="references"><a class="anchor" href="#references"></a>References</h2>
 
     <p>Pawitan Y (2013) In all likelihood - Statistical modelling and
-  inference using likelihood. Clarendon Press, Oxford.</p>
+  inference using likelihood. Clarendon Press, Oxford.
+  Venzon DJ and Moolgavkar SH (1988) A Method for Computing
+  Profile-Likelihood Based Confidence Intervals, Applied Statistics, 37,
+  87–94.</p>
 
     <h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2>
     <pre class="examples"><div class='input'><span class='no'>f</span> <span class='kw'>&lt;-</span> <span class='fu'><a href='mkinfit.html'>mkinfit</a></span>(<span class='st'>"SFO"</span>, <span class='no'>FOCUS_2006_C</span>, <span class='kw'>quiet</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>)
@@ -213,9 +226,9 @@ the parameter estimate</p></td>
 #&gt; k_parent_sink  0.2109541  0.4440528
 #&gt; sigma          1.9778868  7.3681380</div><div class='input'><span class='co'># \dontrun{</span>
   <span class='fu'><a href='https://rdrr.io/r/stats/confint.html'>confint</a></span>(<span class='no'>f</span>, <span class='kw'>method</span> <span class='kw'>=</span> <span class='st'>"profile"</span>)</div><div class='output co'>#&gt; <span class='message'>Profiling the likelihood</span></div><div class='output co'>#&gt;                     2.5%      97.5%
-#&gt; parent_0      71.3471007 93.9447024
-#&gt; k_parent_sink  0.2030765  0.4491067
-#&gt; sigma          2.9810656  8.8633278</div><div class='input'># }
+#&gt; parent_0      73.0641834 92.1392181
+#&gt; k_parent_sink  0.2170293  0.4235348
+#&gt; sigma          3.1307772  8.0628314</div><div class='input'># }
 </div></pre>
   </div>
   <div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
diff --git a/man/confint.mkinfit.Rd b/man/confint.mkinfit.Rd
index 943904b9..29959e52 100644
--- a/man/confint.mkinfit.Rd
+++ b/man/confint.mkinfit.Rd
@@ -5,9 +5,9 @@
 \title{Confidence intervals for parameters of mkinfit objects}
 \usage{
 \method{confint}{mkinfit}(object, parm, level = 0.95, alpha = 1 -
-  level, method = c("profile", "quadratic"), transformed = TRUE,
-  backtransform = TRUE, distribution = c("student_t", "normal"),
-  quiet = FALSE, ...)
+  level, cutoff, method = c("profile", "quadratic"),
+  transformed = TRUE, backtransform = TRUE,
+  distribution = c("student_t", "normal"), quiet = FALSE, ...)
 }
 \arguments{
 \item{object}{An \code{\link{mkinfit}} object}
@@ -19,6 +19,10 @@ confidence intervals. If missing, all parameters are considered.}
 
 \item{alpha}{The allowed error probability, overrides 'level' if specified.}
 
+\item{cutoff}{Possibility to specify an alternative cutoff for the difference
+in the log-likelihoods at the confidence boundary. Specifying an explicit
+cutoff value overrides arguments 'level' and 'alpha'}
+
 \item{method}{The 'profile' method searches the parameter space for the
 cutoff of the confidence intervals by means of a likelihood ratio test.
 The 'quadratic' method approximates the likelihood function at the
@@ -45,7 +49,9 @@ A matrix with columns giving lower and upper confidence limits for
   each parameter.
 }
 \description{
-Confidence intervals for parameters of mkinfit objects
+The default method 'profile' is based on the profile likelihood for each
+parameter. The method uses two nested optimisations. The speed of the method
+could likely be improved by using the method of Venzon and Moolgavkar (1988).
 }
 \examples{
 f <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE)
@@ -57,4 +63,7 @@ confint(f, method = "quadratic")
 \references{
 Pawitan Y (2013) In all likelihood - Statistical modelling and
   inference using likelihood. Clarendon Press, Oxford.
+  Venzon DJ and Moolgavkar SH (1988) A Method for Computing
+  Profile-Likelihood Based Confidence Intervals, Applied Statistics, 37,
+  87–94.
 }
diff --git a/test.log b/test.log
index bc3b3c8c..da9af79e 100644
--- a/test.log
+++ b/test.log
@@ -2,10 +2,10 @@ Loading mkin
 Testing mkin
 ✔ |  OK F W S | Context
 ✔ |   2       | Export dataset for reading into CAKE
-✔ |   9       | Confidence intervals and p-values [2.3 s]
-✔ |  10       | Error model fitting [37.2 s]
+✔ |  10       | Confidence intervals and p-values [12.8 s]
+✔ |  10       | Error model fitting [37.1 s]
 ✔ |   5       | Calculation of FOCUS chi2 error levels [3.5 s]
-✔ |  13       | Results for FOCUS D established in expertise for UBA (Ranke 2014) [3.3 s]
+✔ |  13       | Results for FOCUS D established in expertise for UBA (Ranke 2014) [3.4 s]
 ✔ |   6       | Test fitting the decline of metabolites from their maximum [0.9 s]
 ✔ |   1       | Fitting the logistic model [0.9 s]
 ✔ |   1       | Test dataset class mkinds used in gmkin
@@ -13,19 +13,19 @@ Testing mkin
 ✔ |   9       | mkinmod model generation and printing [0.2 s]
 ✔ |   3       | Model predictions with mkinpredict [0.3 s]
 ✔ |  16       | Evaluations according to 2015 NAFTA guidance [4.0 s]
-✔ |   4       | Calculation of maximum time weighted average concentrations (TWAs) [2.3 s]
+✔ |   4       | Calculation of maximum time weighted average concentrations (TWAs) [2.2 s]
 ✔ |   3       | Summary
 ✔ |  11       | Plotting [0.6 s]
 ✔ |   3       | AIC calculation
-✔ |   2       | Complex test case from Schaefer et al. (2007) Piacenza paper [5.2 s]
+✔ |   2       | Complex test case from Schaefer et al. (2007) Piacenza paper [5.3 s]
 ✔ |   4       | Fitting the SFORB model [1.7 s]
 ✔ |   1       | Summaries of old mkinfit objects
-✔ |   4       | Results for synthetic data established in expertise for UBA (Ranke 2014) [7.0 s]
+✔ |   4       | Results for synthetic data established in expertise for UBA (Ranke 2014) [7.2 s]
 
 ══ Results ═════════════════════════════════════════════════════════════════════
-Duration: 72.1 s
+Duration: 84.5 s
 
-OK:       119
+OK:       120
 Failed:   0
 Warnings: 0
 Skipped:  0
diff --git a/tests/testthat/FOCUS_2006_D.csf b/tests/testthat/FOCUS_2006_D.csf
index 22a4b125..c9da7d61 100644
--- a/tests/testthat/FOCUS_2006_D.csf
+++ b/tests/testthat/FOCUS_2006_D.csf
@@ -5,7 +5,7 @@ Description:
 MeasurementUnits: % AR
 TimeUnits: days
 Comments: Created using mkin::CAKE_export
-Date: 2019-10-26
+Date: 2019-10-28
 Optimiser: IRLS
 
 [Data]
diff --git a/tests/testthat/test_confidence.R b/tests/testthat/test_confidence.R
index 13652436..5f76c344 100644
--- a/tests/testthat/test_confidence.R
+++ b/tests/testthat/test_confidence.R
@@ -1,3 +1,25 @@
+# We set up some models and fits with nls for comparisons
+SFO_trans <- function(t, parent_0, log_k_parent_sink) {
+  parent_0 * exp(- exp(log_k_parent_sink) * t)
+}
+SFO_notrans <- function(t, parent_0, k_parent_sink) {
+  parent_0 * exp(- k_parent_sink * t)
+}
+f_1_nls_trans <- nls(value ~ SFO_trans(time, parent_0, log_k_parent_sink),
+  data = FOCUS_2006_A,
+  start = list(parent_0 = 100, log_k_parent_sink = log(0.1)))
+f_1_nls_notrans <- nls(value ~ SFO_notrans(time, parent_0, k_parent_sink),
+  data = FOCUS_2006_A,
+  start = list(parent_0 = 100, k_parent_sink = 0.1))
+
+f_1_mkin_OLS <- mkinfit("SFO", FOCUS_2006_A, quiet = TRUE)
+f_1_mkin_OLS_notrans <- mkinfit("SFO", FOCUS_2006_A, quiet = TRUE,
+  transform_rates = FALSE)
+
+
+f_2_mkin <- mkinfit("DFOP", FOCUS_2006_C, quiet = TRUE)
+f_2_nls <- nls(value ~ SSbiexp(time, A1, lrc1, A2, lrc2), data = FOCUS_2006_C)
+
 context("Confidence intervals and p-values")
 
 test_that("The confint method 'quadratic' is consistent with the summary", {
@@ -20,24 +42,7 @@ test_that("The confint method 'quadratic' is consistent with the summary", {
 
 })
 
-test_that("Quadratic confidence intervals for rate constants are comparable to confint.nls", {
-
-  SFO_trans <- function(t, parent_0, log_k_parent_sink) {
-    parent_0 * exp(- exp(log_k_parent_sink) * t)
-  }
-  SFO_notrans <- function(t, parent_0, k_parent_sink) {
-    parent_0 * exp(- k_parent_sink * t)
-  }
-  f_1_nls_trans <- nls(value ~ SFO_trans(time, parent_0, log_k_parent_sink),
-    data = FOCUS_2006_A,
-    start = list(parent_0 = 100, log_k_parent_sink = log(0.1)))
-  f_1_nls_notrans <- nls(value ~ SFO_notrans(time, parent_0, k_parent_sink),
-    data = FOCUS_2006_A,
-    start = list(parent_0 = 100, k_parent_sink = 0.1))
-
-  f_1_mkin_OLS <- mkinfit("SFO", FOCUS_2006_A, quiet = TRUE)
-  f_1_mkin_OLS_notrans <- mkinfit("SFO", FOCUS_2006_A, quiet = TRUE,
-    transform_rates = FALSE)
+test_that("Quadratic confidence intervals for rate constants are comparable to values in summary.nls", {
 
   # Check fitted parameter values
   expect_equivalent(
@@ -70,8 +75,6 @@ test_that("Quadratic confidence intervals for rate constants are comparable to c
   expect_equivalent(se_nls_notrans[2] / se_mkin_notrans[2], sqrt(8/5), tolerance = 0.01)
 
   # Another case:
-  f_2_mkin <- mkinfit("DFOP", FOCUS_2006_C, quiet = TRUE)
-  f_2_nls <- nls(value ~ SSbiexp(time, A1, lrc1, A2, lrc2), data = FOCUS_2006_C)
   se_mkin_2 <- summary(f_2_mkin)$par[1:4, "Std. Error"]
   se_nls_2 <- summary(f_2_nls)$coefficients[, "Std. Error"]
   # Here we the ratio of standard errors can be explained by the same
@@ -82,42 +85,24 @@ test_that("Quadratic confidence intervals for rate constants are comparable to c
     tolerance = 0.03)
 })
 
-#test_that("Likelihood profile based confidence intervals work", {
-
-#   f <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE)
-#   f <- fits[["SFO", "FOCUS_C"]]
-#   f_notrans <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE, transform_rates = FALSE)
-#   pf <- parms(f)
-#   f_nll <- function(parent_0, k_parent_sink, sigma) {
-#     - f$ll(c(parent_0 = as.numeric(parent_0),
-#         k_parent_sink = as.numeric(k_parent_sink),
-#         sigma = as.numeric(sigma)))
-#   }
-#   f_nll(parent_0 = 100, k_parent_sink = 0.3, sigma = 4.7)
-#   f_nll(pf[1], pf[2], pf[3])
-#   f_mle <- stats4::mle(f_nll, start = as.list(parms(f)), nobs = nrow(FOCUS_2006_C))
-#   f_mle <- stats4::mle(f_nll, start = as.list(parms(f)), nobs = 17L)
-#   logLik(f_mle)
-#   stats4::confint(f_mle, nobs = nrow(FOCUS_2006_C))
-# 
-#     ci_mkin_1 <- confint(f,
-#       method = "quadratic", backtransform = FALSE,
-#       transformed = TRUE)
-#     ci_maxLik_1 <- maxLik::confint.maxLik(f_maxLik)
-# 
-#     f_tc_2_maxLik <- maxLik::maxLik(f_tc_2$ll,
-#       start = f_tc_2$par)
-#     ci_tc_2 <- confint(f_tc_2, method = "quadratic",
-#         backtransform = FALSE, transformed = TRUE,
-#         distribution = "normal")
-#     ci_tc_2_maxLik <- confint(f_tc_2_maxLik)
-#     rel_diff_ci_tc_2 <- (ci_tc_2 - ci_tc_2_maxLik)/ci_tc_2_maxLik
-#     # The ilr transformed variable appear to misbehave in the numeric differentiation
-#     expect_equivalent(
-#       rel_diff_ci_tc_2[c(2, 3, 4, 6, 7, 9, 10), ], rep(0, 14),
-#       tolerance = 0.05)
-# 
-#     ci_tc_2[, 1]
-#       ci_tc_2_maxLik[, 1]
-
-#})
+test_that("Likelihood profile based confidence intervals work", {
+   f <- fits[["SFO", "FOCUS_C"]]
+
+   # negative log-likelihood for use with mle
+   f_nll <- function(parent_0, k_parent_sink, sigma) {
+     - f$ll(c(parent_0 = as.numeric(parent_0),
+         k_parent_sink = as.numeric(k_parent_sink),
+         sigma = as.numeric(sigma)))
+   }
+   f_mle <- stats4::mle(f_nll, start = as.list(parms(f)), nobs = nrow(FOCUS_2006_C))
+
+   ci_mkin_1_p_0.95 <- confint(f, method = "profile", level = 0.95, quiet = TRUE)
+
+   # Magically, we get very similar boundaries as stats4::mle
+   # (we need to capture the output to avoid printing this while testing as
+   # stats4::confint uses cat() for its message, instead of message(), so
+   # suppressMessage() has no effect)
+   msg <- capture.output(ci_mle_1_0.95 <- stats4::confint(f_mle, level = 0.95))
+   rel_diff_ci <- (ci_mle_1_0.95 - ci_mkin_1_p_0.95)/ci_mle_1_0.95
+   expect_equivalent(as.numeric(rel_diff_ci), rep(0, 6), tolerance = 1e-4)
+})
author	Johannes Ranke <jranke@uni-bremen.de>	2019-10-28 13:23:40 +0100
committer	Johannes Ranke <jranke@uni-bremen.de>	2019-10-28 13:23:40 +0100
commit	bd761d879a95872f82e6d8f893634a61e122a938 (patch)
tree	0b66fa5064eb5bd9e0014de18f433c2692fcbc00
parent	6e5630a0df7e857697ff2ce4730a5f7f45b67377 (diff)