From 7b7729694363515007193d1c3e29e9b76271abb3 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Sat, 26 Oct 2019 20:50:09 +0200 Subject: parms and confint methods The confint method can do profile likelihood based confidence intervals! --- R/confint.mkinfit.R | 139 ++++++++++++++++++++++++++++++++++++++++++++++++++++ R/mkinfit.R | 49 +++++++++--------- R/parms.mkinfit.R | 24 +++++++++ 3 files changed, 190 insertions(+), 22 deletions(-) create mode 100644 R/confint.mkinfit.R create mode 100644 R/parms.mkinfit.R (limited to 'R') diff --git a/R/confint.mkinfit.R b/R/confint.mkinfit.R new file mode 100644 index 00000000..887adc9f --- /dev/null +++ b/R/confint.mkinfit.R @@ -0,0 +1,139 @@ +#' Confidence intervals for parameters of mkinfit objects +#' +#' @param object An \code{\link{mkinfit}} object +#' @param parm A vector of names of the parameters which are to be given +#' confidence intervals. If missing, all parameters are considered. +#' @param level The confidence level required +#' @param alpha The allowed error probability, overrides 'level' if specified. +#' @param method The 'profile' method searches the parameter space for the +#' cutoff of the confidence intervals by means of a likelihood ratio test. +#' The 'quadratic' method approximates the likelihood function at the +#' optimised parameters using the second term of the Taylor expansion, using +#' a second derivative (hessian) contained in the object. +#' @param transformed If the quadratic approximation is used, should it be +#' applied to the likelihood based on the transformed parameters? +#' @param backtransform If we approximate the likelihood in terms of the +#' transformed parameters, should we backtransform the parameters with +#' their confidence intervals? +#' @param distribution For the quadratic approximation, should we use +#' the student t distribution or assume normal distribution for +#' the parameter estimate +#' @param quiet Should we suppress messages? +#' @return A matrix with columns giving lower and upper confidence limits for +#' each parameter. +#' @references Pawitan Y (2013) In all likelihood - Statistical modelling and +#' inference using likelihood. Clarendon Press, Oxford. +#' @examples +#' f <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE) +#' confint(f, method = "quadratic") +#' confint(f, method = "profile") +#' @export +confint.mkinfit <- function(object, parm, + level = 0.95, alpha = 1 - level, + method = c("profile", "quadratic"), + transformed = TRUE, backtransform = TRUE, + distribution = c("student_t", "normal"), quiet = FALSE, ...) +{ + tparms <- parms(object, transformed = TRUE) + bparms <- parms(object, transformed = FALSE) + tpnames <- names(tparms) + bpnames <- names(bparms) + + return_pnames <- if (missing(parm)) { + if (backtransform) bpnames else tpnames + } else { + parm + } + + p <- length(return_pnames) + + method <- match.arg(method) + + a <- c(alpha / 2, 1 - (alpha / 2)) + + if (method == "quadratic") { + + distribution <- match.arg(distribution) + + quantiles <- switch(distribution, + student_t = qt(a, object$df.residual), + normal = qnorm(a)) + + covar_pnames <- if (missing(parm)) { + if (transformed) tpnames else bpnames + } else { + parm + } + + return_parms <- if (backtransform) bparms[return_pnames] + else tparms[return_pnames] + + covar_parms <- if (transformed) tparms[covar_pnames] + else bparms[covar_pnames] + + if (transformed) { + covar <- try(solve(object$hessian), silent = TRUE) + } else { + covar <- try(solve(object$hessian_notrans), silent = TRUE) + } + + # If inverting the covariance matrix failed or produced NA values + if (!is.numeric(covar) | is.na(covar[1])) { + ses <- lci <- uci <- rep(NA, p) + } else { + ses <- sqrt(diag(covar))[covar_pnames] + lci <- covar_parms + quantiles[1] * ses + uci <- covar_parms + quantiles[2] * ses + if (backtransform) { + lci_back <- backtransform_odeparms(lci, + object$mkinmod, object$transform_rates, object$transform_fractions) + lci <- c(lci_back, lci[names(object$errparms)]) + uci_back <- backtransform_odeparms(uci, + object$mkinmod, object$transform_rates, object$transform_fractions) + uci <- c(uci_back, uci[names(object$errparms)]) + } + } + } + + if (method == "profile") { + message("Profiling the likelihood") + lci <- uci <- rep(NA, p) + names(lci) <- names(uci) <- return_pnames + + profile_pnames <- if(missing(parm)) names(parms(object)) + else parm + + # We do two-sided intervals based on the likelihood ratio + cutoff <- 0.5 * qchisq(1 - (alpha / 2), 1) + + all_parms <- parms(object) + + for (pname in profile_pnames) + { + pnames_free <- setdiff(names(all_parms), pname) + profile_ll <- function(x) + { + pll_cost <- function(P) { + parms_cost <- all_parms + parms_cost[pnames_free] <- P[pnames_free] + parms_cost[pname] <- x + - object$ll(parms_cost) + } + - nlminb(all_parms[pnames_free], pll_cost)$objective + } + + cost <- function(x) { + (cutoff - (object$logLik - profile_ll(x)))^2 + } + + lci[pname] <- optimize(cost, lower = 0, upper = all_parms[pname])$minimum + uci[pname] <- optimize(cost, lower = all_parms[pname], upper = 15 * all_parms[pname])$minimum + } + } + + ci <- cbind(lower = lci, upper = uci) + colnames(ci) <- paste0( + format(100 * a, trim = TRUE, scientific = FALSE, digits = 3), "%") + + return(ci) +} diff --git a/R/mkinfit.R b/R/mkinfit.R index 17fd59d0..a3e39053 100644 --- a/R/mkinfit.R +++ b/R/mkinfit.R @@ -1,7 +1,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' Fit a kinetic model to data with one or more state variables -#' +#' #' This function maximises the likelihood of the observed data using the Port #' algorithm \code{\link{nlminb}}, and the specified initial or fixed #' parameters and starting values. In each step of the optimsation, the @@ -9,11 +9,11 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' parameters of the selected error model are fitted simultaneously with the #' degradation model parameters, as both of them are arguments of the #' likelihood function. -#' +#' #' Per default, parameters in the kinetic models are internally transformed in #' order to better satisfy the assumption of a normal distribution of their #' estimators. -#' +#' #' @param mkinmod A list of class \code{\link{mkinmod}}, containing the kinetic #' model to be fitted to the data, or one of the shorthand names ("SFO", #' "FOMC", "DFOP", "HS", "SFORB", "IORE"). If a shorthand name is given, a @@ -33,7 +33,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' as indicated by \code{fixed_parms}. If set to "auto", initial values for #' rate constants are set to default values. Using parameter names that are #' not in the model gives an error. -#' +#' #' It is possible to only specify a subset of the parameters that the model #' needs. You can use the parameter lists "bparms.ode" from a previously #' fitted model, which contains the differential equation parameters from @@ -105,10 +105,10 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' argument. The default value is 100. #' @param error_model If the error model is "const", a constant standard #' deviation is assumed. -#' +#' #' If the error model is "obs", each observed variable is assumed to have its #' own variance. -#' +#' #' If the error model is "tc" (two-component error model), a two component #' error model similar to the one described by Rocke and Lorenzato (1995) is #' used for setting up the likelihood function. Note that this model @@ -119,27 +119,27 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' the error model. If the error model is "const", unweighted nonlinear #' least squares fitting ("OLS") is selected. If the error model is "obs", or #' "tc", the "d_3" algorithm is selected. -#' +#' #' The algorithm "d_3" will directly minimize the negative log-likelihood and #' - independently - also use the three step algorithm described below. The #' fit with the higher likelihood is returned. -#' +#' #' The algorithm "direct" will directly minimize the negative log-likelihood. -#' +#' #' The algorithm "twostep" will minimize the negative log-likelihood after an #' initial unweighted least squares optimisation step. -#' +#' #' The algorithm "threestep" starts with unweighted least squares, then #' optimizes only the error model using the degradation model parameters #' found, and then minimizes the negative log-likelihood with free #' degradation and error model parameters. -#' +#' #' The algorithm "fourstep" starts with unweighted least squares, then #' optimizes only the error model using the degradation model parameters #' found, then optimizes the degradation model again with fixed error model #' parameters, and finally minimizes the negative log-likelihood with free #' degradation and error model parameters. -#' +#' #' The algorithm "IRLS" (Iteratively Reweighted Least Squares) starts with #' unweighted least squares, and then iterates optimization of the error #' model parameters and subsequent optimization of the degradation model @@ -161,20 +161,20 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' @author Johannes Ranke #' @seealso Plotting methods \code{\link{plot.mkinfit}} and #' \code{\link{mkinparplot}}. -#' +#' #' Comparisons of models fitted to the same data can be made using #' \code{\link{AIC}} by virtue of the method \code{\link{logLik.mkinfit}}. -#' +#' #' Fitting of several models to several datasets in a single call to #' \code{\link{mmkin}}. #' @source Rocke, David M. und Lorenzato, Stefan (1995) A two-component model #' for measurement error in analytical chemistry. Technometrics 37(2), 176-184. #' @examples -#' +#' #' # Use shorthand notation for parent only degradation #' fit <- mkinfit("FOMC", FOCUS_2006_C, quiet = TRUE) #' summary(fit) -#' +#' #' # One parent compound, one metabolite, both single first order. #' # Use mkinsub for convenience in model formulation. Pathway to sink included per default. #' SFO_SFO <- mkinmod( @@ -192,7 +192,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' coef(fit.deSolve) #' endpoints(fit.deSolve) #' } -#' +#' #' # Use stepwise fitting, using optimised parameters from parent only fit, FOMC #' \dontrun{ #' FOMC_SFO <- mkinmod( @@ -204,7 +204,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' fit.FOMC = mkinfit("FOMC", FOCUS_2006_D, quiet = TRUE) #' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D, quiet = TRUE, #' parms.ini = fit.FOMC$bparms.ode) -#' +#' #' # Use stepwise fitting, using optimised parameters from parent only fit, SFORB #' SFORB_SFO <- mkinmod( #' parent = list(type = "SFORB", to = "m1", sink = TRUE), @@ -217,7 +217,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' fit.SFORB = mkinfit("SFORB", FOCUS_2006_D, quiet = TRUE) #' fit.SFORB_SFO <- mkinfit(SFORB_SFO, FOCUS_2006_D, parms.ini = fit.SFORB$bparms.ode, quiet = TRUE) #' } -#' +#' #' \dontrun{ #' # Weighted fits, including IRLS #' SFO_SFO.ff <- mkinmod(parent = mkinsub("SFO", "m1"), @@ -229,8 +229,8 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' f.tc <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, error_model = "tc", quiet = TRUE) #' summary(f.tc) #' } -#' -#' +#' +#' #' @export mkinfit <- function(mkinmod, observed, parms.ini = "auto", @@ -795,6 +795,8 @@ mkinfit <- function(mkinmod, observed, fit$hessian <- try(numDeriv::hessian(cost_function, c(degparms, errparms), OLS = FALSE, update_data = FALSE), silent = TRUE) + dimnames(fit$hessian) <- list(names(c(degparms, errparms)), + names(c(degparms, errparms))) # Backtransform parameters bparms.optim = backtransform_odeparms(fit$par, mkinmod, @@ -805,6 +807,9 @@ mkinfit <- function(mkinmod, observed, fit$hessian_notrans <- try(numDeriv::hessian(cost_function, c(bparms.all, errparms), OLS = FALSE, trans = FALSE, update_data = FALSE), silent = TRUE) + + dimnames(fit$hessian_notrans) <- list(names(c(bparms.all, errparms)), + names(c(bparms.all, errparms))) }) fit$error_model_algorithm <- error_model_algorithm @@ -839,7 +844,7 @@ mkinfit <- function(mkinmod, observed, # Log-likelihood with possibility to fix degparms or errparms fit$ll <- function(P, fixed_degparms = FALSE, fixed_errparms = FALSE) { - - cost_function(P, fixed_degparms = fixed_degparms, + - cost_function(P, trans = FALSE, fixed_degparms = fixed_degparms, fixed_errparms = fixed_errparms, OLS = FALSE, update_data = FALSE) } diff --git a/R/parms.mkinfit.R b/R/parms.mkinfit.R new file mode 100644 index 00000000..250d9d1f --- /dev/null +++ b/R/parms.mkinfit.R @@ -0,0 +1,24 @@ +#' Extract model parameters from mkinfit models +#' +#' This function always returns degradation model parameters as well as error +#' model parameters, in order to avoid working with a fitted model without +#' considering the error structure that was assumed for the fit. +#' +#' @param object A fitted model object +#' @param complete Unused argument for compatibility with the generic coef function from base R +#' @return A numeric vector of fitted model parameters +#' @export +parms <- function(object, ...) +{ + UseMethod("parms", object) +} + +#' @param transformed Should the parameters be returned +#' as used internally during the optimisation? +#' @rdname parms +#' @export +parms.mkinfit <- function(object, transformed = FALSE, ...) +{ + if (transformed) object$par + else c(object$bparms.optim, object$errparms) +} -- cgit v1.2.1