From 7b7729694363515007193d1c3e29e9b76271abb3 Mon Sep 17 00:00:00 2001
From: Johannes Ranke <jranke@uni-bremen.de>
Date: Sat, 26 Oct 2019 20:50:09 +0200
Subject: parms and confint methods

The confint method can do profile likelihood based confidence intervals!
---
 R/confint.mkinfit.R | 139 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 R/mkinfit.R         |  49 +++++++++---------
 R/parms.mkinfit.R   |  24 +++++++++
 3 files changed, 190 insertions(+), 22 deletions(-)
 create mode 100644 R/confint.mkinfit.R
 create mode 100644 R/parms.mkinfit.R

(limited to 'R')

diff --git a/R/confint.mkinfit.R b/R/confint.mkinfit.R
new file mode 100644
index 00000000..887adc9f
--- /dev/null
+++ b/R/confint.mkinfit.R
@@ -0,0 +1,139 @@
+#' Confidence intervals for parameters of mkinfit objects
+#'
+#' @param object An \code{\link{mkinfit}} object
+#' @param parm A vector of names of the parameters which are to be given
+#'   confidence intervals. If missing, all parameters are considered.
+#' @param level The confidence level required
+#' @param alpha The allowed error probability, overrides 'level' if specified.
+#' @param method The 'profile' method searches the parameter space for the
+#'   cutoff of the confidence intervals by means of a likelihood ratio test.
+#'   The 'quadratic' method approximates the likelihood function at the
+#'   optimised parameters using the second term of the Taylor expansion, using
+#'   a second derivative (hessian) contained in the object.
+#' @param transformed If the quadratic approximation is used, should it be
+#'   applied to the likelihood based on the transformed parameters?
+#' @param backtransform If we approximate the likelihood in terms of the
+#'   transformed parameters, should we backtransform the parameters with
+#'   their confidence intervals?
+#' @param distribution For the quadratic approximation, should we use
+#'   the student t distribution or assume normal distribution for
+#'   the parameter estimate
+#' @param quiet Should we suppress messages?
+#' @return A matrix with columns giving lower and upper confidence limits for
+#'   each parameter.
+#' @references Pawitan Y (2013) In all likelihood - Statistical modelling and
+#'   inference using likelihood. Clarendon Press, Oxford.
+#' @examples
+#' f <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE)
+#' confint(f, method = "quadratic")
+#' confint(f, method = "profile")
+#' @export
+confint.mkinfit <- function(object, parm,
+  level = 0.95, alpha = 1 - level,
+  method = c("profile", "quadratic"),
+  transformed = TRUE, backtransform = TRUE,
+  distribution = c("student_t", "normal"), quiet = FALSE, ...)
+{
+  tparms <- parms(object, transformed = TRUE)
+  bparms <- parms(object, transformed = FALSE)
+  tpnames <- names(tparms)
+  bpnames <- names(bparms)
+
+  return_pnames <- if (missing(parm)) {
+    if (backtransform) bpnames else tpnames
+  } else {
+    parm
+  }
+
+  p <- length(return_pnames)
+
+  method <- match.arg(method)
+
+  a <- c(alpha / 2, 1 - (alpha / 2))
+
+  if (method == "quadratic") {
+
+    distribution <- match.arg(distribution)
+
+    quantiles <- switch(distribution,
+      student_t = qt(a, object$df.residual),
+      normal = qnorm(a))
+
+    covar_pnames <- if (missing(parm)) {
+      if (transformed) tpnames else bpnames
+    } else {
+      parm
+    }
+
+    return_parms <- if (backtransform) bparms[return_pnames]
+      else tparms[return_pnames]
+
+    covar_parms <- if (transformed) tparms[covar_pnames]
+      else bparms[covar_pnames]
+
+    if (transformed) {
+      covar <- try(solve(object$hessian), silent = TRUE)
+    } else {
+      covar <- try(solve(object$hessian_notrans), silent = TRUE)
+    }
+
+    # If inverting the covariance matrix failed or produced NA values
+    if (!is.numeric(covar) | is.na(covar[1])) {
+      ses <- lci <- uci <- rep(NA, p)
+    } else {
+      ses     <- sqrt(diag(covar))[covar_pnames]
+      lci    <- covar_parms + quantiles[1] * ses
+      uci    <- covar_parms + quantiles[2] * ses
+      if (backtransform) {
+        lci_back <- backtransform_odeparms(lci,
+          object$mkinmod, object$transform_rates, object$transform_fractions)
+        lci <- c(lci_back, lci[names(object$errparms)])
+        uci_back <- backtransform_odeparms(uci,
+          object$mkinmod, object$transform_rates, object$transform_fractions)
+        uci <- c(uci_back, uci[names(object$errparms)])
+      }
+    }
+  }
+
+  if (method == "profile") {
+    message("Profiling the likelihood")
+    lci <- uci <- rep(NA, p)
+    names(lci) <- names(uci) <- return_pnames
+
+    profile_pnames <- if(missing(parm)) names(parms(object))
+      else parm
+
+    # We do two-sided intervals based on the likelihood ratio
+    cutoff <- 0.5 * qchisq(1 - (alpha / 2), 1)
+
+    all_parms <- parms(object)
+
+    for (pname in profile_pnames)
+    {
+      pnames_free <- setdiff(names(all_parms), pname)
+      profile_ll <- function(x)
+      {
+        pll_cost <- function(P) {
+          parms_cost <- all_parms
+          parms_cost[pnames_free] <- P[pnames_free]
+          parms_cost[pname] <- x
+          - object$ll(parms_cost)
+        }
+        - nlminb(all_parms[pnames_free], pll_cost)$objective
+      }
+
+      cost <- function(x) {
+        (cutoff - (object$logLik - profile_ll(x)))^2
+      }
+
+      lci[pname] <- optimize(cost, lower = 0, upper = all_parms[pname])$minimum
+      uci[pname] <- optimize(cost, lower = all_parms[pname], upper = 15 * all_parms[pname])$minimum
+    }
+  }
+
+  ci <- cbind(lower = lci, upper = uci)
+  colnames(ci) <- paste0(
+    format(100 * a, trim = TRUE, scientific = FALSE, digits = 3), "%")
+
+  return(ci)
+}
diff --git a/R/mkinfit.R b/R/mkinfit.R
index 17fd59d0..a3e39053 100644
--- a/R/mkinfit.R
+++ b/R/mkinfit.R
@@ -1,7 +1,7 @@
 if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 
 #' Fit a kinetic model to data with one or more state variables
-#' 
+#'
 #' This function maximises the likelihood of the observed data using the Port
 #' algorithm \code{\link{nlminb}}, and the specified initial or fixed
 #' parameters and starting values.  In each step of the optimsation, the
@@ -9,11 +9,11 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #' parameters of the selected error model are fitted simultaneously with the
 #' degradation model parameters, as both of them are arguments of the
 #' likelihood function.
-#' 
+#'
 #' Per default, parameters in the kinetic models are internally transformed in
 #' order to better satisfy the assumption of a normal distribution of their
 #' estimators.
-#' 
+#'
 #' @param mkinmod A list of class \code{\link{mkinmod}}, containing the kinetic
 #'   model to be fitted to the data, or one of the shorthand names ("SFO",
 #'   "FOMC", "DFOP", "HS", "SFORB", "IORE"). If a shorthand name is given, a
@@ -33,7 +33,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #'   as indicated by \code{fixed_parms}.  If set to "auto", initial values for
 #'   rate constants are set to default values.  Using parameter names that are
 #'   not in the model gives an error.
-#' 
+#'
 #'   It is possible to only specify a subset of the parameters that the model
 #'   needs. You can use the parameter lists "bparms.ode" from a previously
 #'   fitted model, which contains the differential equation parameters from
@@ -105,10 +105,10 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #'   argument.  The default value is 100.
 #' @param error_model If the error model is "const", a constant standard
 #'   deviation is assumed.
-#' 
+#'
 #'   If the error model is "obs", each observed variable is assumed to have its
 #'   own variance.
-#' 
+#'
 #'   If the error model is "tc" (two-component error model), a two component
 #'   error model similar to the one described by Rocke and Lorenzato (1995) is
 #'   used for setting up the likelihood function.  Note that this model
@@ -119,27 +119,27 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #'   the error model.  If the error model is "const", unweighted nonlinear
 #'   least squares fitting ("OLS") is selected. If the error model is "obs", or
 #'   "tc", the "d_3" algorithm is selected.
-#' 
+#'
 #'   The algorithm "d_3" will directly minimize the negative log-likelihood and
 #'   - independently - also use the three step algorithm described below. The
 #'   fit with the higher likelihood is returned.
-#' 
+#'
 #'   The algorithm "direct" will directly minimize the negative log-likelihood.
-#' 
+#'
 #'   The algorithm "twostep" will minimize the negative log-likelihood after an
 #'   initial unweighted least squares optimisation step.
-#' 
+#'
 #'   The algorithm "threestep" starts with unweighted least squares, then
 #'   optimizes only the error model using the degradation model parameters
 #'   found, and then minimizes the negative log-likelihood with free
 #'   degradation and error model parameters.
-#' 
+#'
 #'   The algorithm "fourstep" starts with unweighted least squares, then
 #'   optimizes only the error model using the degradation model parameters
 #'   found, then optimizes the degradation model again with fixed error model
 #'   parameters, and finally minimizes the negative log-likelihood with free
 #'   degradation and error model parameters.
-#' 
+#'
 #'   The algorithm "IRLS" (Iteratively Reweighted Least Squares) starts with
 #'   unweighted least squares, and then iterates optimization of the error
 #'   model parameters and subsequent optimization of the degradation model
@@ -161,20 +161,20 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #' @author Johannes Ranke
 #' @seealso Plotting methods \code{\link{plot.mkinfit}} and
 #'   \code{\link{mkinparplot}}.
-#' 
+#'
 #'   Comparisons of models fitted to the same data can be made using
 #'   \code{\link{AIC}} by virtue of the method \code{\link{logLik.mkinfit}}.
-#' 
+#'
 #'   Fitting of several models to several datasets in a single call to
 #'   \code{\link{mmkin}}.
 #' @source Rocke, David M. und Lorenzato, Stefan (1995) A two-component model
 #'   for measurement error in analytical chemistry. Technometrics 37(2), 176-184.
 #' @examples
-#' 
+#'
 #' # Use shorthand notation for parent only degradation
 #' fit <- mkinfit("FOMC", FOCUS_2006_C, quiet = TRUE)
 #' summary(fit)
-#' 
+#'
 #' # One parent compound, one metabolite, both single first order.
 #' # Use mkinsub for convenience in model formulation. Pathway to sink included per default.
 #' SFO_SFO <- mkinmod(
@@ -192,7 +192,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #' coef(fit.deSolve)
 #' endpoints(fit.deSolve)
 #' }
-#' 
+#'
 #' # Use stepwise fitting, using optimised parameters from parent only fit, FOMC
 #' \dontrun{
 #' FOMC_SFO <- mkinmod(
@@ -204,7 +204,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #' fit.FOMC = mkinfit("FOMC", FOCUS_2006_D, quiet = TRUE)
 #' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D, quiet = TRUE,
 #'   parms.ini = fit.FOMC$bparms.ode)
-#' 
+#'
 #' # Use stepwise fitting, using optimised parameters from parent only fit, SFORB
 #' SFORB_SFO <- mkinmod(
 #'   parent = list(type = "SFORB", to = "m1", sink = TRUE),
@@ -217,7 +217,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #' fit.SFORB = mkinfit("SFORB", FOCUS_2006_D, quiet = TRUE)
 #' fit.SFORB_SFO <- mkinfit(SFORB_SFO, FOCUS_2006_D, parms.ini = fit.SFORB$bparms.ode, quiet = TRUE)
 #' }
-#' 
+#'
 #' \dontrun{
 #' # Weighted fits, including IRLS
 #' SFO_SFO.ff <- mkinmod(parent = mkinsub("SFO", "m1"),
@@ -229,8 +229,8 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
 #' f.tc <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, error_model = "tc", quiet = TRUE)
 #' summary(f.tc)
 #' }
-#' 
-#' 
+#'
+#'
 #' @export
 mkinfit <- function(mkinmod, observed,
   parms.ini = "auto",
@@ -795,6 +795,8 @@ mkinfit <- function(mkinmod, observed,
 
     fit$hessian <- try(numDeriv::hessian(cost_function, c(degparms, errparms), OLS = FALSE,
         update_data = FALSE), silent = TRUE)
+    dimnames(fit$hessian) <- list(names(c(degparms, errparms)),
+      names(c(degparms, errparms)))
 
     # Backtransform parameters
     bparms.optim = backtransform_odeparms(fit$par, mkinmod,
@@ -805,6 +807,9 @@ mkinfit <- function(mkinmod, observed,
 
     fit$hessian_notrans <- try(numDeriv::hessian(cost_function, c(bparms.all, errparms),
         OLS = FALSE, trans = FALSE, update_data = FALSE), silent = TRUE)
+
+    dimnames(fit$hessian_notrans) <- list(names(c(bparms.all, errparms)),
+      names(c(bparms.all, errparms)))
   })
 
   fit$error_model_algorithm <- error_model_algorithm
@@ -839,7 +844,7 @@ mkinfit <- function(mkinmod, observed,
 
   # Log-likelihood with possibility to fix degparms or errparms
   fit$ll <- function(P, fixed_degparms = FALSE, fixed_errparms = FALSE) {
-    - cost_function(P, fixed_degparms = fixed_degparms,
+    - cost_function(P, trans = FALSE, fixed_degparms = fixed_degparms,
       fixed_errparms = fixed_errparms, OLS = FALSE, update_data = FALSE)
   }
 
diff --git a/R/parms.mkinfit.R b/R/parms.mkinfit.R
new file mode 100644
index 00000000..250d9d1f
--- /dev/null
+++ b/R/parms.mkinfit.R
@@ -0,0 +1,24 @@
+#' Extract model parameters from mkinfit models
+#'
+#' This function always returns degradation model parameters as well as error
+#' model parameters, in order to avoid working with a fitted model without
+#' considering the error structure that was assumed for the fit.
+#' 
+#' @param object A fitted model object
+#' @param complete Unused argument for compatibility with the generic coef function from base R
+#' @return A numeric vector of fitted model parameters
+#' @export
+parms <- function(object, ...)
+{
+  UseMethod("parms", object)
+}
+
+#' @param transformed Should the parameters be returned
+#'   as used internally during the optimisation?
+#' @rdname parms
+#' @export
+parms.mkinfit <- function(object, transformed = FALSE, ...)
+{
+  if (transformed) object$par
+  else c(object$bparms.optim, object$errparms)
+}
-- 
cgit v1.2.3