parms and confint methods

The confint method can do profile likelihood based confidence intervals!
author: Johannes Ranke <jranke@uni-bremen.de> 2019-10-26 20:50:09 +0200
committer: Johannes Ranke <jranke@uni-bremen.de> 2019-10-26 20:50:09 +0200
commit: 7b7729694363515007193d1c3e29e9b76271abb3 (patch)
tree: 3aa572fb56d26d4ad53463e320ee9e75ff4a2321
parent: af2e1540cdad2fd00bb6216a38a754ff748629ad (diff)
9 files changed, 407 insertions, 75 deletions
diff --git a/NAMESPACE b/NAMESPACE
index 72ad9d3a..b4b781ac 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,9 +2,11 @@
 
 S3method("[",mmkin)
 S3method(AIC,mmkin)
+S3method(confint,mkinfit)
 S3method(logLik,mkinfit)
 S3method(mkinpredict,mkinfit)
 S3method(mkinpredict,mkinmod)
+S3method(parms,mkinfit)
 S3method(plot,mkinfit)
 S3method(plot,mmkin)
 S3method(plot,nafta)
@@ -45,6 +47,7 @@ export(mkinresplot)
 export(mkinsub)
 export(mmkin)
 export(nafta)
+export(parms)
 export(plot_err)
 export(plot_res)
 export(plot_sep)
diff --git a/NEWS.md b/NEWS.md
index cd6a1f97..4bdae5b9 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,7 @@
 # mkin 0.9.49.6 (unreleased)
 
+- Add 'parms' and 'confint' methods for mkinfit objects. Confidence intervals based on the quadratic approximation as in the summary, and based on the profile likelihood
+
 - Move long-running tests to tests/testthat/slow with a separate test log. They currently take around 7 minutes on my system
 
 - 'mkinfit': Clean the code and return functions to calculate the log-likelihood and the sum of squared residuals
diff --git a/R/confint.mkinfit.R b/R/confint.mkinfit.R
new file mode 100644
index 00000000..887adc9f
--- /dev/null
+++ b/R/confint.mkinfit.R
@@ -0,0 +1,139 @@
+#' Confidence intervals for parameters of mkinfit objects
+#'
+#' @param object An \code{\link{mkinfit}} object
+#' @param parm A vector of names of the parameters which are to be given
+#'   confidence intervals. If missing, all parameters are considered.
+#' @param level The confidence level required
+#' @param alpha The allowed error probability, overrides 'level' if specified.
+#' @param method The 'profile' method searches the parameter space for the
+#'   cutoff of the confidence intervals by means of a likelihood ratio test.
+#'   The 'quadratic' method approximates the likelihood function at the
+#'   optimised parameters using the second term of the Taylor expansion, using
+#'   a second derivative (hessian) contained in the object.
+#' @param transformed If the quadratic approximation is used, should it be
+#'   applied to the likelihood based on the transformed parameters?
+#' @param backtransform If we approximate the likelihood in terms of the
+#'   transformed parameters, should we backtransform the parameters with
+#'   their confidence intervals?
+#' @param distribution For the quadratic approximation, should we use
+#'   the student t distribution or assume normal distribution for
+#'   the parameter estimate
+#' @param quiet Should we suppress messages?
+#' @return A matrix with columns giving lower and upper confidence limits for
+#'   each parameter.
+#' @references Pawitan Y (2013) In all likelihood - Statistical modelling and
+#'   inference using likelihood. Clarendon Press, Oxford.
+#' @examples
+#' f <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE)
+#' confint(f, method = "quadratic")
+#' confint(f, method = "profile")
+#' @export
+confint.mkinfit <- function(object, parm,
+  level = 0.95, alpha = 1 - level,
+  method = c("profile", "quadratic"),
+  transformed = TRUE, backtransform = TRUE,
+  distribution = c("student_t", "normal"), quiet = FALSE, ...)
+{
+  tparms <- parms(object, transformed = TRUE)
+  bparms <- parms(object, transformed = FALSE)
+  tpnames <- names(tparms)
+  bpnames <- names(bparms)
+
+  return_pnames <- if (missing(parm)) {
+    if (backtransform) bpnames else tpnames
+  } else {
+    parm
+  }
+
+  p <- length(return_pnames)
+
+  method <- match.arg(method)
+
+  a <- c(alpha / 2, 1 - (alpha / 2))
+
+  if (method == "quadratic") {
+
+    distribution <- match.arg(distribution)
+
+    quantiles <- switch(distribution,
+      student_t = qt(a, object$df.residual),
+      normal = qnorm(a))
+
+    covar_pnames <- if (missing(parm)) {
+      if (transformed) tpnames else bpnames
+    } else {
+      parm
+    }
+
+    return_parms <- if (backtransform) bparms[return_pnames]
+      else tparms[return_pnames]
+
+    covar_parms <- if (transformed) tparms[covar_pnames]
+      else bparms[covar_pnames]
+
+    if (transformed) {
+      covar <- try(solve(object$hessian), silent = TRUE)
+    } else {
+      covar <- try(solve(object$hessian_notrans), silent = TRUE)
+    }
+
+    # If inverting the covariance matrix failed or produced NA values
+    if (!is.numeric(covar) | is.na(covar[1])) {
+      ses <- lci <- uci <- rep(NA, p)
+    } else {
+      ses     <- sqrt(diag(covar))[covar_pnames]
+      lci    <- covar_parms + quantiles[1] * ses
+      uci    <- covar_parms + quantiles[2] * ses
+      if (backtransform) {
+        lci_back <- backtransform_odeparms(lci,
+          object$mkinmod, object$transform_rates, object$transform_fractions)
+        lci <- c(lci_back, lci[names(object$errparms)])
+        uci_back <- backtransform_odeparms(uci,
+          object$mkinmod, object$transform_rates, object$transform_fractions)
+        uci <- c(uci_back, uci[names(object$errparms)])
+      }
+    }
+  }
+
+  if (method == "profile") {
+    message("Profiling the likelihood")
+    lci <- uci <- rep(NA, p)
+    names(lci) <- names(uci) <- return_pnames
+
+    profile_pnames <- if(missing(parm)) names(parms(object))
+      else parm
+
+    # We do two-sided intervals based on the likelihood ratio
+    cutoff <- 0.5 * qchisq(1 - (alpha / 2), 1)
+
+    all_parms <- parms(object)
+
+    for (pname in profile_pnames)
+    {
+      pnames_free <- setdiff(names(all_parms), pname)
+      profile_ll <- function(x)
+      {
+        pll_cost <- function(P) {
+          parms_cost <- all_parms
+          parms_cost[pnames_free] <- P[pnames_free]
+          parms_cost[pname] <- x
+          - object$ll(parms_cost)
+        }
+        - nlminb(all_parms[pnames_free], pll_cost)$objective
+      }
+
+      cost <- function(x) {
+        (cutoff - (object$logLik - profile_ll(x)))^2
+      }
+
+      lci[pname] <- optimize(cost, lower = 0, upper = all_parms[pname])$minimum
+      uci[pname] <- optimize(cost, lower = all_parms[pname], upper = 15 * all_parms[pname])$minimum
+    }
+  }
+
+  ci <- cbind(lower = lci, upper = uci)
+  colnames(ci) <- paste0(
+    format(100 * a, trim = TRUE, scientific = FALSE, digits = 3), "%")
+
+  return(ci)
+}
diff --git a/R/mkinfit.R b/R/mkinfit.R
index 17fd59d0..a3e39053 100644
--- a/R/mkinfit.R
+++ b/R/mkinfit.R
@@ -1,7 +1,7 @@
 if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 
 #' Fit a kinetic model to data with one or more state variables
-#' 
+#'
 #' This function maximises the likelihood of the observed data using the Port
 #' algorithm \code{\link{nlminb}}, and the specified initial or fixed
 #' parameters and starting values.  In each step of the optimsation, the
@@ -9,11 +9,11 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #' parameters of the selected error model are fitted simultaneously with the
 #' degradation model parameters, as both of them are arguments of the
 #' likelihood function.
-#' 
+#'
 #' Per default, parameters in the kinetic models are internally transformed in
 #' order to better satisfy the assumption of a normal distribution of their
 #' estimators.
-#' 
+#'
 #' @param mkinmod A list of class \code{\link{mkinmod}}, containing the kinetic
 #'   model to be fitted to the data, or one of the shorthand names ("SFO",
 #'   "FOMC", "DFOP", "HS", "SFORB", "IORE"). If a shorthand name is given, a
@@ -33,7 +33,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #'   as indicated by \code{fixed_parms}.  If set to "auto", initial values for
 #'   rate constants are set to default values.  Using parameter names that are
 #'   not in the model gives an error.
-#' 
+#'
 #'   It is possible to only specify a subset of the parameters that the model
 #'   needs. You can use the parameter lists "bparms.ode" from a previously
 #'   fitted model, which contains the differential equation parameters from
@@ -105,10 +105,10 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #'   argument.  The default value is 100.
 #' @param error_model If the error model is "const", a constant standard
 #'   deviation is assumed.
-#' 
+#'
 #'   If the error model is "obs", each observed variable is assumed to have its
 #'   own variance.
-#' 
+#'
 #'   If the error model is "tc" (two-component error model), a two component
 #'   error model similar to the one described by Rocke and Lorenzato (1995) is
 #'   used for setting up the likelihood function.  Note that this model
@@ -119,27 +119,27 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #'   the error model.  If the error model is "const", unweighted nonlinear
 #'   least squares fitting ("OLS") is selected. If the error model is "obs", or
 #'   "tc", the "d_3" algorithm is selected.
-#' 
+#'
 #'   The algorithm "d_3" will directly minimize the negative log-likelihood and
 #'   - independently - also use the three step algorithm described below. The
 #'   fit with the higher likelihood is returned.
-#' 
+#'
 #'   The algorithm "direct" will directly minimize the negative log-likelihood.
-#' 
+#'
 #'   The algorithm "twostep" will minimize the negative log-likelihood after an
 #'   initial unweighted least squares optimisation step.
-#' 
+#'
 #'   The algorithm "threestep" starts with unweighted least squares, then
 #'   optimizes only the error model using the degradation model parameters
 #'   found, and then minimizes the negative log-likelihood with free
 #'   degradation and error model parameters.
-#' 
+#'
 #'   The algorithm "fourstep" starts with unweighted least squares, then
 #'   optimizes only the error model using the degradation model parameters
 #'   found, then optimizes the degradation model again with fixed error model
 #'   parameters, and finally minimizes the negative log-likelihood with free
 #'   degradation and error model parameters.
-#' 
+#'
 #'   The algorithm "IRLS" (Iteratively Reweighted Least Squares) starts with
 #'   unweighted least squares, and then iterates optimization of the error
 #'   model parameters and subsequent optimization of the degradation model
@@ -161,20 +161,20 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #' @author Johannes Ranke
 #' @seealso Plotting methods \code{\link{plot.mkinfit}} and
 #'   \code{\link{mkinparplot}}.
-#' 
+#'
 #'   Comparisons of models fitted to the same data can be made using
 #'   \code{\link{AIC}} by virtue of the method \code{\link{logLik.mkinfit}}.
-#' 
+#'
 #'   Fitting of several models to several datasets in a single call to
 #'   \code{\link{mmkin}}.
 #' @source Rocke, David M. und Lorenzato, Stefan (1995) A two-component model
 #'   for measurement error in analytical chemistry. Technometrics 37(2), 176-184.
 #' @examples
-#' 
+#'
 #' # Use shorthand notation for parent only degradation
 #' fit <- mkinfit("FOMC", FOCUS_2006_C, quiet = TRUE)
 #' summary(fit)
-#' 
+#'
 #' # One parent compound, one metabolite, both single first order.
 #' # Use mkinsub for convenience in model formulation. Pathway to sink included per default.
 #' SFO_SFO <- mkinmod(
@@ -192,7 +192,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #' coef(fit.deSolve)
 #' endpoints(fit.deSolve)
 #' }
-#' 
+#'
 #' # Use stepwise fitting, using optimised parameters from parent only fit, FOMC
 #' \dontrun{
 #' FOMC_SFO <- mkinmod(
@@ -204,7 +204,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #' fit.FOMC = mkinfit("FOMC", FOCUS_2006_D, quiet = TRUE)
 #' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D, quiet = TRUE,
 #'   parms.ini = fit.FOMC$bparms.ode)
-#' 
+#'
 #' # Use stepwise fitting, using optimised parameters from parent only fit, SFORB
 #' SFORB_SFO <- mkinmod(
 #'   parent = list(type = "SFORB", to = "m1", sink = TRUE),
@@ -217,7 +217,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #' fit.SFORB = mkinfit("SFORB", FOCUS_2006_D, quiet = TRUE)
 #' fit.SFORB_SFO <- mkinfit(SFORB_SFO, FOCUS_2006_D, parms.ini = fit.SFORB$bparms.ode, quiet = TRUE)
 #' }
-#' 
+#'
 #' \dontrun{
 #' # Weighted fits, including IRLS
 #' SFO_SFO.ff <- mkinmod(parent = mkinsub("SFO", "m1"),
@@ -229,8 +229,8 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #' f.tc <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, error_model = "tc", quiet = TRUE)
 #' summary(f.tc)
 #' }
-#' 
-#' 
+#'
+#'
 #' @export
 mkinfit <- function(mkinmod, observed,
   parms.ini = "auto",
@@ -795,6 +795,8 @@ mkinfit <- function(mkinmod, observed,
 
     fit$hessian <- try(numDeriv::hessian(cost_function, c(degparms, errparms), OLS = FALSE,
         update_data = FALSE), silent = TRUE)
+    dimnames(fit$hessian) <- list(names(c(degparms, errparms)),
+      names(c(degparms, errparms)))
 
     # Backtransform parameters
     bparms.optim = backtransform_odeparms(fit$par, mkinmod,
@@ -805,6 +807,9 @@ mkinfit <- function(mkinmod, observed,
 
     fit$hessian_notrans <- try(numDeriv::hessian(cost_function, c(bparms.all, errparms),
         OLS = FALSE, trans = FALSE, update_data = FALSE), silent = TRUE)
+
+    dimnames(fit$hessian_notrans) <- list(names(c(bparms.all, errparms)),
+      names(c(bparms.all, errparms)))
   })
 
   fit$error_model_algorithm <- error_model_algorithm
@@ -839,7 +844,7 @@ mkinfit <- function(mkinmod, observed,
 
   # Log-likelihood with possibility to fix degparms or errparms
   fit$ll <- function(P, fixed_degparms = FALSE, fixed_errparms = FALSE) {
-    - cost_function(P, fixed_degparms = fixed_degparms,
+    - cost_function(P, trans = FALSE, fixed_degparms = fixed_degparms,
       fixed_errparms = fixed_errparms, OLS = FALSE, update_data = FALSE)
   }
 
diff --git a/R/parms.mkinfit.R b/R/parms.mkinfit.R
new file mode 100644
index 00000000..250d9d1f
--- /dev/null
+++ b/R/parms.mkinfit.R
@@ -0,0 +1,24 @@
+#' Extract model parameters from mkinfit models
+#'
+#' This function always returns degradation model parameters as well as error
+#' model parameters, in order to avoid working with a fitted model without
+#' considering the error structure that was assumed for the fit.
+#' 
+#' @param object A fitted model object
+#' @param complete Unused argument for compatibility with the generic coef function from base R
+#' @return A numeric vector of fitted model parameters
+#' @export
+parms <- function(object, ...)
+{
+  UseMethod("parms", object)
+}
+
+#' @param transformed Should the parameters be returned
+#'   as used internally during the optimisation?
+#' @rdname parms
+#' @export
+parms.mkinfit <- function(object, transformed = FALSE, ...)
+{
+  if (transformed) object$par
+  else c(object$bparms.optim, object$errparms)
+}
diff --git a/man/confint.mkinfit.Rd b/man/confint.mkinfit.Rd
new file mode 100644
index 00000000..94f55d14
--- /dev/null
+++ b/man/confint.mkinfit.Rd
@@ -0,0 +1,56 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/confint.mkinfit.R
+\name{confint.mkinfit}
+\alias{confint.mkinfit}
+\title{Confidence intervals for parameters of mkinfit objects}
+\usage{
+\method{confint}{mkinfit}(object, parm, level = 0.95, alpha = 1 -
+  level, method = c("profile", "quadratic"), transformed = TRUE,
+  backtransform = TRUE, distribution = c("student_t", "normal"),
+  quiet = FALSE, ...)
+}
+\arguments{
+\item{object}{An \code{\link{mkinfit}} object}
+
+\item{parm}{A vector of names of the parameters which are to be given
+confidence intervals. If missing, all parameters are considered.}
+
+\item{level}{The confidence level required}
+
+\item{alpha}{The allowed error probability, overrides 'level' if specified.}
+
+\item{method}{The 'profile' method searches the parameter space for the
+cutoff of the confidence intervals by means of a likelihood ratio test.
+The 'quadratic' method approximates the likelihood function at the
+optimised parameters using the second term of the Taylor expansion, using
+a second derivative (hessian) contained in the object.}
+
+\item{transformed}{If the quadratic approximation is used, should it be
+applied to the likelihood based on the transformed parameters?}
+
+\item{backtransform}{If we approximate the likelihood in terms of the
+transformed parameters, should we backtransform the parameters with
+their confidence intervals?}
+
+\item{distribution}{For the quadratic approximation, should we use
+the student t distribution or assume normal distribution for
+the parameter estimate}
+
+\item{quiet}{Should we suppress messages?}
+}
+\value{
+A matrix with columns giving lower and upper confidence limits for
+  each parameter.
+}
+\description{
+Confidence intervals for parameters of mkinfit objects
+}
+\examples{
+f <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE)
+confint(f, method = "quadratic")
+confint(f, method = "profile")
+}
+\references{
+Pawitan Y (2013) In all likelihood - Statistical modelling and
+  inference using likelihood. Clarendon Press, Oxford.
+}
diff --git a/man/parms.Rd b/man/parms.Rd
new file mode 100644
index 00000000..6de52557
--- /dev/null
+++ b/man/parms.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/parms.mkinfit.R
+\name{parms}
+\alias{parms}
+\alias{parms.mkinfit}
+\title{Extract model parameters from mkinfit models}
+\usage{
+parms(object, ...)
+
+\method{parms}{mkinfit}(object, transformed = FALSE, ...)
+}
+\arguments{
+\item{object}{A fitted model object}
+
+\item{transformed}{Should the parameters be returned
+as used internally during the optimisation?}
+
+\item{complete}{Unused argument for compatibility with the generic coef function from base R}
+}
+\value{
+A numeric vector of fitted model parameters
+}
+\description{
+This function always returns degradation model parameters as well as error
+model parameters, in order to avoid working with a fitted model without
+considering the error structure that was assumed for the fit.
+}
diff --git a/tests/testthat/setup_script.R b/tests/testthat/setup_script.R
index 51fea4f6..c3fb4f06 100644
--- a/tests/testthat/setup_script.R
+++ b/tests/testthat/setup_script.R
@@ -1,21 +1,3 @@
-# Copyright (C) 2016-2019 Johannes Ranke
-# Contact: jranke@uni-bremen.de
-
-# This file is part of the R package mkin
-
-# mkin is free software: you can redistribute it and/or modify it under the
-# terms of the GNU General Public License as published by the Free Software
-# Foundation, either version 3 of the License, or (at your option) any later
-# version.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
-# details.
-
-# You should have received a copy of the GNU General Public License along with
-# this program. If not, see <http://www.gnu.org/licenses/>
-
 require(mkin)
 require(testthat)
 
@@ -76,5 +58,16 @@ f_SFO_lin_mkin_OLS <- mkinfit(m_synth_SFO_lin, SFO_lin_a, quiet = TRUE)
 f_SFO_lin_mkin_ML <- mkinfit(m_synth_SFO_lin, SFO_lin_a, quiet = TRUE,
   error_model = "const", error_model_algorithm = "direct")
 
-fit_obs_1 <- mkinfit(m_synth_SFO_lin, SFO_lin_a, error_model = "obs", quiet = TRUE)
-fit_tc_1 <- mkinfit(m_synth_SFO_lin, SFO_lin_a, error_model = "tc", quiet = TRUE)
+# We know direct optimization is OK and direct needs 4 sec versus 5.5 for threestep and 6 for IRLS
+fit_obs_1 <- mkinfit(m_synth_SFO_lin, SFO_lin_a, error_model = "obs", quiet = TRUE,
+  error_model_algorithm = "direct")
+# We know threestep is OK, and threestep (and IRLS) need 4.8 se versus 5.6 for direct
+fit_tc_1 <- mkinfit(m_synth_SFO_lin, SFO_lin_a, error_model = "tc", quiet = TRUE,
+  error_model_algorithm = "threestep")
+
+# We know direct optimization is OK and direct needs 8 sec versus 11 sec for threestep
+f_tc_2 <- mkinfit(m_synth_DFOP_par, DFOP_par_c, error_model = "tc",
+  error_model_algorithm = "direct", quiet = TRUE)
+
+f_tc_2_ntf <- mkinfit(m_synth_DFOP_par, DFOP_par_c, error_model = "tc",
+  transform_fractions = FALSE, error_model_algorithm = "direct", quiet = TRUE)
diff --git a/tests/testthat/test_confidence.R b/tests/testthat/test_confidence.R
index 0423302b..68ff4a98 100644
--- a/tests/testthat/test_confidence.R
+++ b/tests/testthat/test_confidence.R
@@ -1,40 +1,123 @@
-# Copyright (C) 2019 Johannes Ranke
-# Contact: jranke@uni-bremen.de
+context("Confidence intervals and p-values")
 
-# This file is part of the R package mkin
+test_that("The confint method 'quadratic' is consistent with the summary", {
+  expect_equivalent(
+    confint(f_SFO_lin_mkin_ML, method = "quadratic"),
+    summary(f_SFO_lin_mkin_ML)$bpar[, c("Lower", "Upper")])
 
-# mkin is free software: you can redistribute it and/or modify it under the
-# terms of the GNU General Public License as published by the Free Software
-# Foundation, either version 3 of the License, or (at your option) any later
-# version.
+  expect_equivalent(
+    confint(f_SFO_lin_mkin_ML, method = "quadratic", backtransform = FALSE),
+    summary(f_SFO_lin_mkin_ML)$par[, c("Lower", "Upper")])
 
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
-# details.
+  f_notrans <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE, transform_rates = FALSE)
+  expect_equivalent(
+    confint(f_notrans, method = "quadratic", transformed = FALSE),
+    summary(f_notrans)$par[, c("Lower", "Upper")])
 
-# You should have received a copy of the GNU General Public License along with
-# this program. If not, see <http://www.gnu.org/licenses/>
+  expect_equivalent(
+    confint(f_notrans, method = "quadratic", transformed = FALSE),
+    summary(f_notrans)$bpar[, c("Lower", "Upper")])
 
-context("Confidence intervals and p-values")
+})
+
+test_that("Quadratic confidence intervals for rate constants are comparable to confint.nls", {
+
+  SFO_trans <- function(t, parent_0, log_k_parent_sink) {
+    parent_0 * exp(- exp(log_k_parent_sink) * t)
+  }
+  SFO_notrans <- function(t, parent_0, k_parent_sink) {
+    parent_0 * exp(- k_parent_sink * t)
+  }
+  f_1_nls_trans <- nls(value ~ SFO_trans(time, parent_0, log_k_parent_sink),
+    data = FOCUS_2006_A,
+    start = list(parent_0 = 100, log_k_parent_sink = log(0.1)))
+  f_1_nls_notrans <- nls(value ~ SFO_notrans(time, parent_0, k_parent_sink),
+    data = FOCUS_2006_A,
+    start = list(parent_0 = 100, k_parent_sink = 0.1))
+
+  f_1_mkin_OLS <- mkinfit("SFO", FOCUS_2006_A, quiet = TRUE)
+  f_1_mkin_OLS_notrans <- mkinfit("SFO", FOCUS_2006_A, quiet = TRUE,
+    transform_rates = FALSE)
+
+  # Check fitted parameter values
+  expect_equivalent(
+    (f_1_mkin_OLS$bparms.optim -coef(f_1_nls_notrans))/f_1_mkin_OLS$bparms.optim,
+    rep(0, 2), tolerance = 1e-6)
+  expect_equivalent(
+    (f_1_mkin_OLS$par[1:2] - coef(f_1_nls_trans))/f_1_mkin_OLS$par[1:2],
+    rep(0, 2), tolerance = 1e-6)
+
+  # Check the standard error for the transformed parameters
+  se_nls <- summary(f_1_nls_trans)$coefficients[, "Std. Error"]
+  # This is of similar magnitude as the standard error obtained with the mkin
+  se_mkin <- summary(f_1_mkin_OLS)$par[1:2, "Std. Error"]
+
+  se_nls_notrans <- summary(f_1_nls_notrans)$coefficients[, "Std. Error"]
+  # This is also of similar magnitude as the standard error obtained with the mkin
+  se_mkin_notrans <- summary(f_1_mkin_OLS_notrans)$par[1:2, "Std. Error"]
+
+  # The difference can partly be explained by the ratio between
+  # the maximum likelihood estimate of the standard error sqrt(rss/n)
+  # and the estimate used in nls sqrt(rss/rdf), i.e. by the factor
+  # sqrt(n/rdf).
+  # In the case of mkin, the residual degrees of freedom are only taken into
+  # account in the subsequent step of generating the confidence intervals for
+  # the parameters (including sigma)
+
+  # Strangely, this only works for the rate constant to less than 1%, but
+  # not for the initial estimate
+  expect_equivalent(se_nls[2] / se_mkin[2], sqrt(8/5), tolerance = 0.01)
+  expect_equivalent(se_nls_notrans[2] / se_mkin_notrans[2], sqrt(8/5), tolerance = 0.01)
+
+  # Another case:
+  f_2_mkin <- mkinfit("DFOP", FOCUS_2006_C, quiet = TRUE)
+  f_2_nls <- nls(value ~ SSbiexp(time, A1, lrc1, A2, lrc2), data = FOCUS_2006_C)
+  se_mkin_2 <- summary(f_2_mkin)$par[1:4, "Std. Error"]
+  se_nls_2 <- summary(f_2_nls)$coefficients[, "Std. Error"]
+  # Here we the ratio of standard errors can be explained by the same
+  # principle up to about 3%
+  expect_equivalent(
+    se_nls_2[c("lrc1", "lrc2")] / se_mkin_2[c("log_k1", "log_k2")],
+    rep(sqrt(nrow(FOCUS_2006_C) / (nrow(FOCUS_2006_C) - 4)), 2),
+    tolerance = 0.03)
+}
+
+test_that("Likelihood profile based confidence intervals work", {
 
-test_that("Confidence intervals are stable", {
-  bpar_1 <- summary(f_SFO_lin_mkin_ML)$bpar[, c("Estimate", "Lower", "Upper")]
-  # The reference used here is mkin 0.9.48.1
-  bpar_1_mkin_0.9 <-   read.table(text =
-"parent_0       102.0000 98.6000 106.0000
-k_parent         0.7390  0.6770   0.8070
-k_M1             0.2990  0.2560   0.3490
-k_M2             0.0202  0.0176   0.0233
-f_parent_to_M1   0.7690  0.6640   0.8480
-f_M1_to_M2       0.7230  0.6030   0.8180",
-col.names = c("parameter", "estimate", "lower", "upper"))
-
-  expect_equivalent(signif(bpar_1[1:6, "Estimate"], 3), bpar_1_mkin_0.9$estimate)
-
-  # Relative difference of lower bound of the confidence interval is < 0.02
-  expect_equivalent(bpar_1[1:6, "Lower"], bpar_1_mkin_0.9$lower,
-      scale = bpar_1_mkin_0.9$lower, tolerance = 0.02)
-  expect_equivalent(f_SFO_lin_mkin_OLS$bpar, f_SFO_lin_mkin_ML$bpar)
-  })
+#   f <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE)
+#   f <- fits[["SFO", "FOCUS_C"]]
+#   f_notrans <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE, transform_rates = FALSE)
+#   pf <- parms(f)
+#   f_nll <- function(parent_0, k_parent_sink, sigma) {
+#     - f$ll(c(parent_0 = as.numeric(parent_0),
+#         k_parent_sink = as.numeric(k_parent_sink),
+#         sigma = as.numeric(sigma)))
+#   }
+#   f_nll(parent_0 = 100, k_parent_sink = 0.3, sigma = 4.7)
+#   f_nll(pf[1], pf[2], pf[3])
+#   f_mle <- stats4::mle(f_nll, start = as.list(parms(f)), nobs = nrow(FOCUS_2006_C))
+#   f_mle <- stats4::mle(f_nll, start = as.list(parms(f)), nobs = 17L)
+#   logLik(f_mle)
+#   stats4::confint(f_mle, nobs = nrow(FOCUS_2006_C))
+# 
+#     ci_mkin_1 <- confint(f,
+#       method = "quadratic", backtransform = FALSE,
+#       transformed = TRUE)
+#     ci_maxLik_1 <- maxLik::confint.maxLik(f_maxLik)
+# 
+#     f_tc_2_maxLik <- maxLik::maxLik(f_tc_2$ll,
+#       start = f_tc_2$par)
+#     ci_tc_2 <- confint(f_tc_2, method = "quadratic",
+#         backtransform = FALSE, transformed = TRUE,
+#         distribution = "normal")
+#     ci_tc_2_maxLik <- confint(f_tc_2_maxLik)
+#     rel_diff_ci_tc_2 <- (ci_tc_2 - ci_tc_2_maxLik)/ci_tc_2_maxLik
+#     # The ilr transformed variable appear to misbehave in the numeric differentiation
+#     expect_equivalent(
+#       rel_diff_ci_tc_2[c(2, 3, 4, 6, 7, 9, 10), ], rep(0, 14),
+#       tolerance = 0.05)
+# 
+#     ci_tc_2[, 1]
+#       ci_tc_2_maxLik[, 1]
 
+})
author	Johannes Ranke <jranke@uni-bremen.de>	2019-10-26 20:50:09 +0200
committer	Johannes Ranke <jranke@uni-bremen.de>	2019-10-26 20:50:09 +0200
commit	7b7729694363515007193d1c3e29e9b76271abb3 (patch)
tree	3aa572fb56d26d4ad53463e320ee9e75ff4a2321
parent	af2e1540cdad2fd00bb6216a38a754ff748629ad (diff)