diff options
author | Johannes Ranke <jranke@uni-bremen.de> | 2019-10-26 20:50:09 +0200 |
---|---|---|
committer | Johannes Ranke <jranke@uni-bremen.de> | 2019-10-26 20:50:09 +0200 |
commit | 7b7729694363515007193d1c3e29e9b76271abb3 (patch) | |
tree | 3aa572fb56d26d4ad53463e320ee9e75ff4a2321 | |
parent | af2e1540cdad2fd00bb6216a38a754ff748629ad (diff) |
parms and confint methods
The confint method can do profile likelihood based confidence intervals!
-rw-r--r-- | NAMESPACE | 3 | ||||
-rw-r--r-- | NEWS.md | 2 | ||||
-rw-r--r-- | R/confint.mkinfit.R | 139 | ||||
-rw-r--r-- | R/mkinfit.R | 49 | ||||
-rw-r--r-- | R/parms.mkinfit.R | 24 | ||||
-rw-r--r-- | man/confint.mkinfit.Rd | 56 | ||||
-rw-r--r-- | man/parms.Rd | 27 | ||||
-rw-r--r-- | tests/testthat/setup_script.R | 33 | ||||
-rw-r--r-- | tests/testthat/test_confidence.R | 149 |
9 files changed, 407 insertions, 75 deletions
@@ -2,9 +2,11 @@ S3method("[",mmkin) S3method(AIC,mmkin) +S3method(confint,mkinfit) S3method(logLik,mkinfit) S3method(mkinpredict,mkinfit) S3method(mkinpredict,mkinmod) +S3method(parms,mkinfit) S3method(plot,mkinfit) S3method(plot,mmkin) S3method(plot,nafta) @@ -45,6 +47,7 @@ export(mkinresplot) export(mkinsub) export(mmkin) export(nafta) +export(parms) export(plot_err) export(plot_res) export(plot_sep) @@ -1,5 +1,7 @@ # mkin 0.9.49.6 (unreleased) +- Add 'parms' and 'confint' methods for mkinfit objects. Confidence intervals based on the quadratic approximation as in the summary, and based on the profile likelihood + - Move long-running tests to tests/testthat/slow with a separate test log. They currently take around 7 minutes on my system - 'mkinfit': Clean the code and return functions to calculate the log-likelihood and the sum of squared residuals diff --git a/R/confint.mkinfit.R b/R/confint.mkinfit.R new file mode 100644 index 00000000..887adc9f --- /dev/null +++ b/R/confint.mkinfit.R @@ -0,0 +1,139 @@ +#' Confidence intervals for parameters of mkinfit objects +#' +#' @param object An \code{\link{mkinfit}} object +#' @param parm A vector of names of the parameters which are to be given +#' confidence intervals. If missing, all parameters are considered. +#' @param level The confidence level required +#' @param alpha The allowed error probability, overrides 'level' if specified. +#' @param method The 'profile' method searches the parameter space for the +#' cutoff of the confidence intervals by means of a likelihood ratio test. +#' The 'quadratic' method approximates the likelihood function at the +#' optimised parameters using the second term of the Taylor expansion, using +#' a second derivative (hessian) contained in the object. +#' @param transformed If the quadratic approximation is used, should it be +#' applied to the likelihood based on the transformed parameters? +#' @param backtransform If we approximate the likelihood in terms of the +#' transformed parameters, should we backtransform the parameters with +#' their confidence intervals? +#' @param distribution For the quadratic approximation, should we use +#' the student t distribution or assume normal distribution for +#' the parameter estimate +#' @param quiet Should we suppress messages? +#' @return A matrix with columns giving lower and upper confidence limits for +#' each parameter. +#' @references Pawitan Y (2013) In all likelihood - Statistical modelling and +#' inference using likelihood. Clarendon Press, Oxford. +#' @examples +#' f <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE) +#' confint(f, method = "quadratic") +#' confint(f, method = "profile") +#' @export +confint.mkinfit <- function(object, parm, + level = 0.95, alpha = 1 - level, + method = c("profile", "quadratic"), + transformed = TRUE, backtransform = TRUE, + distribution = c("student_t", "normal"), quiet = FALSE, ...) +{ + tparms <- parms(object, transformed = TRUE) + bparms <- parms(object, transformed = FALSE) + tpnames <- names(tparms) + bpnames <- names(bparms) + + return_pnames <- if (missing(parm)) { + if (backtransform) bpnames else tpnames + } else { + parm + } + + p <- length(return_pnames) + + method <- match.arg(method) + + a <- c(alpha / 2, 1 - (alpha / 2)) + + if (method == "quadratic") { + + distribution <- match.arg(distribution) + + quantiles <- switch(distribution, + student_t = qt(a, object$df.residual), + normal = qnorm(a)) + + covar_pnames <- if (missing(parm)) { + if (transformed) tpnames else bpnames + } else { + parm + } + + return_parms <- if (backtransform) bparms[return_pnames] + else tparms[return_pnames] + + covar_parms <- if (transformed) tparms[covar_pnames] + else bparms[covar_pnames] + + if (transformed) { + covar <- try(solve(object$hessian), silent = TRUE) + } else { + covar <- try(solve(object$hessian_notrans), silent = TRUE) + } + + # If inverting the covariance matrix failed or produced NA values + if (!is.numeric(covar) | is.na(covar[1])) { + ses <- lci <- uci <- rep(NA, p) + } else { + ses <- sqrt(diag(covar))[covar_pnames] + lci <- covar_parms + quantiles[1] * ses + uci <- covar_parms + quantiles[2] * ses + if (backtransform) { + lci_back <- backtransform_odeparms(lci, + object$mkinmod, object$transform_rates, object$transform_fractions) + lci <- c(lci_back, lci[names(object$errparms)]) + uci_back <- backtransform_odeparms(uci, + object$mkinmod, object$transform_rates, object$transform_fractions) + uci <- c(uci_back, uci[names(object$errparms)]) + } + } + } + + if (method == "profile") { + message("Profiling the likelihood") + lci <- uci <- rep(NA, p) + names(lci) <- names(uci) <- return_pnames + + profile_pnames <- if(missing(parm)) names(parms(object)) + else parm + + # We do two-sided intervals based on the likelihood ratio + cutoff <- 0.5 * qchisq(1 - (alpha / 2), 1) + + all_parms <- parms(object) + + for (pname in profile_pnames) + { + pnames_free <- setdiff(names(all_parms), pname) + profile_ll <- function(x) + { + pll_cost <- function(P) { + parms_cost <- all_parms + parms_cost[pnames_free] <- P[pnames_free] + parms_cost[pname] <- x + - object$ll(parms_cost) + } + - nlminb(all_parms[pnames_free], pll_cost)$objective + } + + cost <- function(x) { + (cutoff - (object$logLik - profile_ll(x)))^2 + } + + lci[pname] <- optimize(cost, lower = 0, upper = all_parms[pname])$minimum + uci[pname] <- optimize(cost, lower = all_parms[pname], upper = 15 * all_parms[pname])$minimum + } + } + + ci <- cbind(lower = lci, upper = uci) + colnames(ci) <- paste0( + format(100 * a, trim = TRUE, scientific = FALSE, digits = 3), "%") + + return(ci) +} diff --git a/R/mkinfit.R b/R/mkinfit.R index 17fd59d0..a3e39053 100644 --- a/R/mkinfit.R +++ b/R/mkinfit.R @@ -1,7 +1,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' Fit a kinetic model to data with one or more state variables -#' +#' #' This function maximises the likelihood of the observed data using the Port #' algorithm \code{\link{nlminb}}, and the specified initial or fixed #' parameters and starting values. In each step of the optimsation, the @@ -9,11 +9,11 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' parameters of the selected error model are fitted simultaneously with the #' degradation model parameters, as both of them are arguments of the #' likelihood function. -#' +#' #' Per default, parameters in the kinetic models are internally transformed in #' order to better satisfy the assumption of a normal distribution of their #' estimators. -#' +#' #' @param mkinmod A list of class \code{\link{mkinmod}}, containing the kinetic #' model to be fitted to the data, or one of the shorthand names ("SFO", #' "FOMC", "DFOP", "HS", "SFORB", "IORE"). If a shorthand name is given, a @@ -33,7 +33,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' as indicated by \code{fixed_parms}. If set to "auto", initial values for #' rate constants are set to default values. Using parameter names that are #' not in the model gives an error. -#' +#' #' It is possible to only specify a subset of the parameters that the model #' needs. You can use the parameter lists "bparms.ode" from a previously #' fitted model, which contains the differential equation parameters from @@ -105,10 +105,10 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' argument. The default value is 100. #' @param error_model If the error model is "const", a constant standard #' deviation is assumed. -#' +#' #' If the error model is "obs", each observed variable is assumed to have its #' own variance. -#' +#' #' If the error model is "tc" (two-component error model), a two component #' error model similar to the one described by Rocke and Lorenzato (1995) is #' used for setting up the likelihood function. Note that this model @@ -119,27 +119,27 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' the error model. If the error model is "const", unweighted nonlinear #' least squares fitting ("OLS") is selected. If the error model is "obs", or #' "tc", the "d_3" algorithm is selected. -#' +#' #' The algorithm "d_3" will directly minimize the negative log-likelihood and #' - independently - also use the three step algorithm described below. The #' fit with the higher likelihood is returned. -#' +#' #' The algorithm "direct" will directly minimize the negative log-likelihood. -#' +#' #' The algorithm "twostep" will minimize the negative log-likelihood after an #' initial unweighted least squares optimisation step. -#' +#' #' The algorithm "threestep" starts with unweighted least squares, then #' optimizes only the error model using the degradation model parameters #' found, and then minimizes the negative log-likelihood with free #' degradation and error model parameters. -#' +#' #' The algorithm "fourstep" starts with unweighted least squares, then #' optimizes only the error model using the degradation model parameters #' found, then optimizes the degradation model again with fixed error model #' parameters, and finally minimizes the negative log-likelihood with free #' degradation and error model parameters. -#' +#' #' The algorithm "IRLS" (Iteratively Reweighted Least Squares) starts with #' unweighted least squares, and then iterates optimization of the error #' model parameters and subsequent optimization of the degradation model @@ -161,20 +161,20 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' @author Johannes Ranke #' @seealso Plotting methods \code{\link{plot.mkinfit}} and #' \code{\link{mkinparplot}}. -#' +#' #' Comparisons of models fitted to the same data can be made using #' \code{\link{AIC}} by virtue of the method \code{\link{logLik.mkinfit}}. -#' +#' #' Fitting of several models to several datasets in a single call to #' \code{\link{mmkin}}. #' @source Rocke, David M. und Lorenzato, Stefan (1995) A two-component model #' for measurement error in analytical chemistry. Technometrics 37(2), 176-184. #' @examples -#' +#' #' # Use shorthand notation for parent only degradation #' fit <- mkinfit("FOMC", FOCUS_2006_C, quiet = TRUE) #' summary(fit) -#' +#' #' # One parent compound, one metabolite, both single first order. #' # Use mkinsub for convenience in model formulation. Pathway to sink included per default. #' SFO_SFO <- mkinmod( @@ -192,7 +192,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' coef(fit.deSolve) #' endpoints(fit.deSolve) #' } -#' +#' #' # Use stepwise fitting, using optimised parameters from parent only fit, FOMC #' \dontrun{ #' FOMC_SFO <- mkinmod( @@ -204,7 +204,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' fit.FOMC = mkinfit("FOMC", FOCUS_2006_D, quiet = TRUE) #' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D, quiet = TRUE, #' parms.ini = fit.FOMC$bparms.ode) -#' +#' #' # Use stepwise fitting, using optimised parameters from parent only fit, SFORB #' SFORB_SFO <- mkinmod( #' parent = list(type = "SFORB", to = "m1", sink = TRUE), @@ -217,7 +217,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' fit.SFORB = mkinfit("SFORB", FOCUS_2006_D, quiet = TRUE) #' fit.SFORB_SFO <- mkinfit(SFORB_SFO, FOCUS_2006_D, parms.ini = fit.SFORB$bparms.ode, quiet = TRUE) #' } -#' +#' #' \dontrun{ #' # Weighted fits, including IRLS #' SFO_SFO.ff <- mkinmod(parent = mkinsub("SFO", "m1"), @@ -229,8 +229,8 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' f.tc <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, error_model = "tc", quiet = TRUE) #' summary(f.tc) #' } -#' -#' +#' +#' #' @export mkinfit <- function(mkinmod, observed, parms.ini = "auto", @@ -795,6 +795,8 @@ mkinfit <- function(mkinmod, observed, fit$hessian <- try(numDeriv::hessian(cost_function, c(degparms, errparms), OLS = FALSE, update_data = FALSE), silent = TRUE) + dimnames(fit$hessian) <- list(names(c(degparms, errparms)), + names(c(degparms, errparms))) # Backtransform parameters bparms.optim = backtransform_odeparms(fit$par, mkinmod, @@ -805,6 +807,9 @@ mkinfit <- function(mkinmod, observed, fit$hessian_notrans <- try(numDeriv::hessian(cost_function, c(bparms.all, errparms), OLS = FALSE, trans = FALSE, update_data = FALSE), silent = TRUE) + + dimnames(fit$hessian_notrans) <- list(names(c(bparms.all, errparms)), + names(c(bparms.all, errparms))) }) fit$error_model_algorithm <- error_model_algorithm @@ -839,7 +844,7 @@ mkinfit <- function(mkinmod, observed, # Log-likelihood with possibility to fix degparms or errparms fit$ll <- function(P, fixed_degparms = FALSE, fixed_errparms = FALSE) { - - cost_function(P, fixed_degparms = fixed_degparms, + - cost_function(P, trans = FALSE, fixed_degparms = fixed_degparms, fixed_errparms = fixed_errparms, OLS = FALSE, update_data = FALSE) } diff --git a/R/parms.mkinfit.R b/R/parms.mkinfit.R new file mode 100644 index 00000000..250d9d1f --- /dev/null +++ b/R/parms.mkinfit.R @@ -0,0 +1,24 @@ +#' Extract model parameters from mkinfit models +#' +#' This function always returns degradation model parameters as well as error +#' model parameters, in order to avoid working with a fitted model without +#' considering the error structure that was assumed for the fit. +#' +#' @param object A fitted model object +#' @param complete Unused argument for compatibility with the generic coef function from base R +#' @return A numeric vector of fitted model parameters +#' @export +parms <- function(object, ...) +{ + UseMethod("parms", object) +} + +#' @param transformed Should the parameters be returned +#' as used internally during the optimisation? +#' @rdname parms +#' @export +parms.mkinfit <- function(object, transformed = FALSE, ...) +{ + if (transformed) object$par + else c(object$bparms.optim, object$errparms) +} diff --git a/man/confint.mkinfit.Rd b/man/confint.mkinfit.Rd new file mode 100644 index 00000000..94f55d14 --- /dev/null +++ b/man/confint.mkinfit.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/confint.mkinfit.R +\name{confint.mkinfit} +\alias{confint.mkinfit} +\title{Confidence intervals for parameters of mkinfit objects} +\usage{ +\method{confint}{mkinfit}(object, parm, level = 0.95, alpha = 1 - + level, method = c("profile", "quadratic"), transformed = TRUE, + backtransform = TRUE, distribution = c("student_t", "normal"), + quiet = FALSE, ...) +} +\arguments{ +\item{object}{An \code{\link{mkinfit}} object} + +\item{parm}{A vector of names of the parameters which are to be given +confidence intervals. If missing, all parameters are considered.} + +\item{level}{The confidence level required} + +\item{alpha}{The allowed error probability, overrides 'level' if specified.} + +\item{method}{The 'profile' method searches the parameter space for the +cutoff of the confidence intervals by means of a likelihood ratio test. +The 'quadratic' method approximates the likelihood function at the +optimised parameters using the second term of the Taylor expansion, using +a second derivative (hessian) contained in the object.} + +\item{transformed}{If the quadratic approximation is used, should it be +applied to the likelihood based on the transformed parameters?} + +\item{backtransform}{If we approximate the likelihood in terms of the +transformed parameters, should we backtransform the parameters with +their confidence intervals?} + +\item{distribution}{For the quadratic approximation, should we use +the student t distribution or assume normal distribution for +the parameter estimate} + +\item{quiet}{Should we suppress messages?} +} +\value{ +A matrix with columns giving lower and upper confidence limits for + each parameter. +} +\description{ +Confidence intervals for parameters of mkinfit objects +} +\examples{ +f <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE) +confint(f, method = "quadratic") +confint(f, method = "profile") +} +\references{ +Pawitan Y (2013) In all likelihood - Statistical modelling and + inference using likelihood. Clarendon Press, Oxford. +} diff --git a/man/parms.Rd b/man/parms.Rd new file mode 100644 index 00000000..6de52557 --- /dev/null +++ b/man/parms.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parms.mkinfit.R +\name{parms} +\alias{parms} +\alias{parms.mkinfit} +\title{Extract model parameters from mkinfit models} +\usage{ +parms(object, ...) + +\method{parms}{mkinfit}(object, transformed = FALSE, ...) +} +\arguments{ +\item{object}{A fitted model object} + +\item{transformed}{Should the parameters be returned +as used internally during the optimisation?} + +\item{complete}{Unused argument for compatibility with the generic coef function from base R} +} +\value{ +A numeric vector of fitted model parameters +} +\description{ +This function always returns degradation model parameters as well as error +model parameters, in order to avoid working with a fitted model without +considering the error structure that was assumed for the fit. +} diff --git a/tests/testthat/setup_script.R b/tests/testthat/setup_script.R index 51fea4f6..c3fb4f06 100644 --- a/tests/testthat/setup_script.R +++ b/tests/testthat/setup_script.R @@ -1,21 +1,3 @@ -# Copyright (C) 2016-2019 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see <http://www.gnu.org/licenses/> - require(mkin) require(testthat) @@ -76,5 +58,16 @@ f_SFO_lin_mkin_OLS <- mkinfit(m_synth_SFO_lin, SFO_lin_a, quiet = TRUE) f_SFO_lin_mkin_ML <- mkinfit(m_synth_SFO_lin, SFO_lin_a, quiet = TRUE, error_model = "const", error_model_algorithm = "direct") -fit_obs_1 <- mkinfit(m_synth_SFO_lin, SFO_lin_a, error_model = "obs", quiet = TRUE) -fit_tc_1 <- mkinfit(m_synth_SFO_lin, SFO_lin_a, error_model = "tc", quiet = TRUE) +# We know direct optimization is OK and direct needs 4 sec versus 5.5 for threestep and 6 for IRLS +fit_obs_1 <- mkinfit(m_synth_SFO_lin, SFO_lin_a, error_model = "obs", quiet = TRUE, + error_model_algorithm = "direct") +# We know threestep is OK, and threestep (and IRLS) need 4.8 se versus 5.6 for direct +fit_tc_1 <- mkinfit(m_synth_SFO_lin, SFO_lin_a, error_model = "tc", quiet = TRUE, + error_model_algorithm = "threestep") + +# We know direct optimization is OK and direct needs 8 sec versus 11 sec for threestep +f_tc_2 <- mkinfit(m_synth_DFOP_par, DFOP_par_c, error_model = "tc", + error_model_algorithm = "direct", quiet = TRUE) + +f_tc_2_ntf <- mkinfit(m_synth_DFOP_par, DFOP_par_c, error_model = "tc", + transform_fractions = FALSE, error_model_algorithm = "direct", quiet = TRUE) diff --git a/tests/testthat/test_confidence.R b/tests/testthat/test_confidence.R index 0423302b..68ff4a98 100644 --- a/tests/testthat/test_confidence.R +++ b/tests/testthat/test_confidence.R @@ -1,40 +1,123 @@ -# Copyright (C) 2019 Johannes Ranke -# Contact: jranke@uni-bremen.de +context("Confidence intervals and p-values") -# This file is part of the R package mkin +test_that("The confint method 'quadratic' is consistent with the summary", { + expect_equivalent( + confint(f_SFO_lin_mkin_ML, method = "quadratic"), + summary(f_SFO_lin_mkin_ML)$bpar[, c("Lower", "Upper")]) -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. + expect_equivalent( + confint(f_SFO_lin_mkin_ML, method = "quadratic", backtransform = FALSE), + summary(f_SFO_lin_mkin_ML)$par[, c("Lower", "Upper")]) -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. + f_notrans <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE, transform_rates = FALSE) + expect_equivalent( + confint(f_notrans, method = "quadratic", transformed = FALSE), + summary(f_notrans)$par[, c("Lower", "Upper")]) -# You should have received a copy of the GNU General Public License along with -# this program. If not, see <http://www.gnu.org/licenses/> + expect_equivalent( + confint(f_notrans, method = "quadratic", transformed = FALSE), + summary(f_notrans)$bpar[, c("Lower", "Upper")]) -context("Confidence intervals and p-values") +}) + +test_that("Quadratic confidence intervals for rate constants are comparable to confint.nls", { + + SFO_trans <- function(t, parent_0, log_k_parent_sink) { + parent_0 * exp(- exp(log_k_parent_sink) * t) + } + SFO_notrans <- function(t, parent_0, k_parent_sink) { + parent_0 * exp(- k_parent_sink * t) + } + f_1_nls_trans <- nls(value ~ SFO_trans(time, parent_0, log_k_parent_sink), + data = FOCUS_2006_A, + start = list(parent_0 = 100, log_k_parent_sink = log(0.1))) + f_1_nls_notrans <- nls(value ~ SFO_notrans(time, parent_0, k_parent_sink), + data = FOCUS_2006_A, + start = list(parent_0 = 100, k_parent_sink = 0.1)) + + f_1_mkin_OLS <- mkinfit("SFO", FOCUS_2006_A, quiet = TRUE) + f_1_mkin_OLS_notrans <- mkinfit("SFO", FOCUS_2006_A, quiet = TRUE, + transform_rates = FALSE) + + # Check fitted parameter values + expect_equivalent( + (f_1_mkin_OLS$bparms.optim -coef(f_1_nls_notrans))/f_1_mkin_OLS$bparms.optim, + rep(0, 2), tolerance = 1e-6) + expect_equivalent( + (f_1_mkin_OLS$par[1:2] - coef(f_1_nls_trans))/f_1_mkin_OLS$par[1:2], + rep(0, 2), tolerance = 1e-6) + + # Check the standard error for the transformed parameters + se_nls <- summary(f_1_nls_trans)$coefficients[, "Std. Error"] + # This is of similar magnitude as the standard error obtained with the mkin + se_mkin <- summary(f_1_mkin_OLS)$par[1:2, "Std. Error"] + + se_nls_notrans <- summary(f_1_nls_notrans)$coefficients[, "Std. Error"] + # This is also of similar magnitude as the standard error obtained with the mkin + se_mkin_notrans <- summary(f_1_mkin_OLS_notrans)$par[1:2, "Std. Error"] + + # The difference can partly be explained by the ratio between + # the maximum likelihood estimate of the standard error sqrt(rss/n) + # and the estimate used in nls sqrt(rss/rdf), i.e. by the factor + # sqrt(n/rdf). + # In the case of mkin, the residual degrees of freedom are only taken into + # account in the subsequent step of generating the confidence intervals for + # the parameters (including sigma) + + # Strangely, this only works for the rate constant to less than 1%, but + # not for the initial estimate + expect_equivalent(se_nls[2] / se_mkin[2], sqrt(8/5), tolerance = 0.01) + expect_equivalent(se_nls_notrans[2] / se_mkin_notrans[2], sqrt(8/5), tolerance = 0.01) + + # Another case: + f_2_mkin <- mkinfit("DFOP", FOCUS_2006_C, quiet = TRUE) + f_2_nls <- nls(value ~ SSbiexp(time, A1, lrc1, A2, lrc2), data = FOCUS_2006_C) + se_mkin_2 <- summary(f_2_mkin)$par[1:4, "Std. Error"] + se_nls_2 <- summary(f_2_nls)$coefficients[, "Std. Error"] + # Here we the ratio of standard errors can be explained by the same + # principle up to about 3% + expect_equivalent( + se_nls_2[c("lrc1", "lrc2")] / se_mkin_2[c("log_k1", "log_k2")], + rep(sqrt(nrow(FOCUS_2006_C) / (nrow(FOCUS_2006_C) - 4)), 2), + tolerance = 0.03) +} + +test_that("Likelihood profile based confidence intervals work", { -test_that("Confidence intervals are stable", { - bpar_1 <- summary(f_SFO_lin_mkin_ML)$bpar[, c("Estimate", "Lower", "Upper")] - # The reference used here is mkin 0.9.48.1 - bpar_1_mkin_0.9 <- read.table(text = -"parent_0 102.0000 98.6000 106.0000 -k_parent 0.7390 0.6770 0.8070 -k_M1 0.2990 0.2560 0.3490 -k_M2 0.0202 0.0176 0.0233 -f_parent_to_M1 0.7690 0.6640 0.8480 -f_M1_to_M2 0.7230 0.6030 0.8180", -col.names = c("parameter", "estimate", "lower", "upper")) - - expect_equivalent(signif(bpar_1[1:6, "Estimate"], 3), bpar_1_mkin_0.9$estimate) - - # Relative difference of lower bound of the confidence interval is < 0.02 - expect_equivalent(bpar_1[1:6, "Lower"], bpar_1_mkin_0.9$lower, - scale = bpar_1_mkin_0.9$lower, tolerance = 0.02) - expect_equivalent(f_SFO_lin_mkin_OLS$bpar, f_SFO_lin_mkin_ML$bpar) - }) +# f <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE) +# f <- fits[["SFO", "FOCUS_C"]] +# f_notrans <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE, transform_rates = FALSE) +# pf <- parms(f) +# f_nll <- function(parent_0, k_parent_sink, sigma) { +# - f$ll(c(parent_0 = as.numeric(parent_0), +# k_parent_sink = as.numeric(k_parent_sink), +# sigma = as.numeric(sigma))) +# } +# f_nll(parent_0 = 100, k_parent_sink = 0.3, sigma = 4.7) +# f_nll(pf[1], pf[2], pf[3]) +# f_mle <- stats4::mle(f_nll, start = as.list(parms(f)), nobs = nrow(FOCUS_2006_C)) +# f_mle <- stats4::mle(f_nll, start = as.list(parms(f)), nobs = 17L) +# logLik(f_mle) +# stats4::confint(f_mle, nobs = nrow(FOCUS_2006_C)) +# +# ci_mkin_1 <- confint(f, +# method = "quadratic", backtransform = FALSE, +# transformed = TRUE) +# ci_maxLik_1 <- maxLik::confint.maxLik(f_maxLik) +# +# f_tc_2_maxLik <- maxLik::maxLik(f_tc_2$ll, +# start = f_tc_2$par) +# ci_tc_2 <- confint(f_tc_2, method = "quadratic", +# backtransform = FALSE, transformed = TRUE, +# distribution = "normal") +# ci_tc_2_maxLik <- confint(f_tc_2_maxLik) +# rel_diff_ci_tc_2 <- (ci_tc_2 - ci_tc_2_maxLik)/ci_tc_2_maxLik +# # The ilr transformed variable appear to misbehave in the numeric differentiation +# expect_equivalent( +# rel_diff_ci_tc_2[c(2, 3, 4, 6, 7, 9, 10), ], rep(0, 14), +# tolerance = 0.05) +# +# ci_tc_2[, 1] +# ci_tc_2_maxLik[, 1] +}) |