From 7091d3738e7e55acb20edb88772b228f6f5b6c98 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Thu, 31 Oct 2019 01:55:01 +0100 Subject: Add likelihood ratio test and other methods, fixes The likelihood ratio test method is lrtest, in addition, methods for update and residuals were added. --- R/confint.mkinfit.R | 3 ++- R/logLik.mkinfit.R | 9 ++++---- R/lrtest.mkinfit.R | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++ R/mkinfit.R | 52 ++++++++++++++++++++++++++++++++++------------ R/residuals.mkinfit.R | 31 ++++++++++++++++++++++++++++ R/update.mkinfit.R | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 191 insertions(+), 18 deletions(-) create mode 100644 R/lrtest.mkinfit.R create mode 100644 R/residuals.mkinfit.R create mode 100644 R/update.mkinfit.R (limited to 'R') diff --git a/R/confint.mkinfit.R b/R/confint.mkinfit.R index fadd14ae..5e1703d6 100644 --- a/R/confint.mkinfit.R +++ b/R/confint.mkinfit.R @@ -57,7 +57,8 @@ #' # c("k_parent_sink", "k_parent_m1", "k_m1_sink", "sigma"), cores = 1)) #' # If we exclude parent_0 (the confidence of which is often of minor interest), we get a nice #' # performance improvement from about 30 seconds to about 12 seconds -#' # system.time(ci_profile_no_parent_0 <- confint(f_d_1, c("k_parent_sink", "k_parent_m1", "k_m1_sink", "sigma"), cores = 4)) +#' # system.time(ci_profile_no_parent_0 <- confint(f_d_1, +#' # c("k_parent_sink", "k_parent_m1", "k_m1_sink", "sigma"), cores = 4)) #' ci_profile #' ci_quadratic_transformed <- confint(f_d_1, method = "quadratic") #' ci_quadratic_transformed diff --git a/R/logLik.mkinfit.R b/R/logLik.mkinfit.R index 4ec3d7d4..cadc0d0a 100644 --- a/R/logLik.mkinfit.R +++ b/R/logLik.mkinfit.R @@ -1,9 +1,10 @@ #' Calculated the log-likelihood of a fitted mkinfit object #' -#' This function simply calculates the product of the likelihood densities -#' calculated using \code{\link{dnorm}}, i.e. assuming normal distribution, -#' with of the mean predicted by the degradation model, and the standard -#' deviation predicted by the error model. +#' This function returns the product of the likelihood densities of each +#' observed value, as calculated as part of the fitting procedure using +#' \code{\link{dnorm}}, i.e. assuming normal distribution, and with the means +#' predicted by the degradation model, and the standard deviations predicted by +#' the error model. #' #' The total number of estimated parameters returned with the value of the #' likelihood is calculated as the sum of fitted degradation model parameters diff --git a/R/lrtest.mkinfit.R b/R/lrtest.mkinfit.R new file mode 100644 index 00000000..9c0a9039 --- /dev/null +++ b/R/lrtest.mkinfit.R @@ -0,0 +1,57 @@ +#' @importFrom lmtest lrtest +#' @export +lmtest::lrtest + +#' Likelihood ratio test for mkinfit models +#' +#' Compare two mkinfit models based on their likelihood. If two fitted +#' mkinfit objects are given as arguments, it is checked if they have been +#' fitted to the same data. It is the responsibility of the user to make sure +#' that the models are nested, i.e. one of them has less degrees of freedom +#' and can be expressed by fixing the parameters of the other. +#' +#' Alternatively, an argument to mkinfit can be given which is then passed +#' to \code{\link{update.mkinfit}} to obtain the alternative model. +#' +#' The comparison is then made by the \code{\link[lmtest]{lrtest.default}} +#' method from the lmtest package. The model with the higher number of fitted +#' parameters (alternative hypothesis) is listed first, then the model with the +#' lower number of fitted parameters (null hypothesis). +#' +#' @importFrom stats logLik update +#' @param object An \code{\link{mkinfit}} object +#' @param object_2 Optionally, another mkinfit object fitted to the same data. +#' @param \dots Argument to \code{\link{mkinfit}}, passed to +#' \code{\link{update.mkinfit}} for creating the alternative fitted object. +#' @examples +#' \dontrun{ +#' test_data <- subset(synthetic_data_for_UBA_2014[[12]]$data, name == "parent") +#' sfo_fit <- mkinfit("SFO", test_data, quiet = TRUE) +#' dfop_fit <- mkinfit("DFOP", test_data, quiet = TRUE) +#' lrtest(dfop_fit, sfo_fit) +#' lrtest(sfo_fit, dfop_fit) +#' lrtest(dfop_fit, error_model = "tc") +#' lrtest(dfop_fit, fixed_parms = c(k2 = 0)) +#' } +#' @export +lrtest.mkinfit <- function(object, object_2 = NULL, ...) { + + name_function <- function(x) { + paste(x$mkinmod$name, "with error model", x$err_mod) + } + + if (is.null(object_2)) { + object_2 <- update(object, ...) + } else { + data_object <- object$data[c("time", "variable", "observed")] + data_object_2 <- object_2$data[c("time", "variable", "observed")] + if (!identical(data_object, data_object_2)) { + stop("It seems that the mkinfit objects have not been fitted to the same data") + } + } + if (attr(logLik(object), "df") > attr(logLik(object_2), "df")) { + lmtest::lrtest.default(object, object_2, name = name_function) + } else { + lmtest::lrtest.default(object_2, object, name = name_function) + } +} diff --git a/R/mkinfit.R b/R/mkinfit.R index a3e39053..27c769db 100644 --- a/R/mkinfit.R +++ b/R/mkinfit.R @@ -52,7 +52,9 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' default values. Otherwise, inital values for all error model parameters #' must be given. #' @param fixed_parms The names of parameters that should not be optimised but -#' rather kept at the values specified in \code{parms.ini}. +#' rather kept at the values specified in \code{parms.ini}. Alternatively, +#' a named numeric vector of parameters to be fixed, regardless of the values +#' in parms.ini. #' @param fixed_initials The names of model variables for which the initial #' state at time 0 should be excluded from the optimisation. Defaults to all #' state variables except for the first one. @@ -253,6 +255,12 @@ mkinfit <- function(mkinmod, observed, trace_parms = FALSE, ...) { + call <- match.call() + + # Derive the name used for the model + if (is.character(mkinmod)) mkinmod_name <- mkinmod + else mkinmod_name <- deparse(substitute(mkinmod)) + # Check mkinmod and generate a model for the variable whith the highest value # if a suitable string is given parent_models_available = c("SFO", "FOMC", "DFOP", "HS", "SFORB", "IORE", "logistic") @@ -302,6 +310,14 @@ mkinfit <- function(mkinmod, observed, # Define starting values for parameters where not specified by the user if (parms.ini[[1]] == "auto") parms.ini = vector() + # Override parms.ini for parameters given as a numeric vector in + # fixed_parms + if (is.numeric(fixed_parms)) { + fixed_parm_names <- names(fixed_parms) + parms.ini[fixed_parm_names] <- fixed_parms + fixed_parms <- fixed_parm_names + } + # Warn for inital parameter specifications that are not in the model wrongpar.names <- setdiff(names(parms.ini), mkinmod$parms) if (length(wrongpar.names) > 0) { @@ -384,15 +400,22 @@ mkinfit <- function(mkinmod, observed, # Set default for state.ini if appropriate parent_name = names(mkinmod$spec)[[1]] + parent_time_0 = subset(observed, time == 0 & name == parent_name)$value + parent_time_0_mean = mean(parent_time_0, na.rm = TRUE) + if (is.na(parent_time_0_mean)) { + state.ini_auto = c(100, rep(0, length(mkinmod$diffs) - 1)) + } else { + state.ini_auto = c(parent_time_0_mean, rep(0, length(mkinmod$diffs) - 1)) + } + names(state.ini_auto) <- mod_vars + if (state.ini[1] == "auto") { - parent_time_0 = subset(observed, time == 0 & name == parent_name)$value - parent_time_0_mean = mean(parent_time_0, na.rm = TRUE) - if (is.na(parent_time_0_mean)) { - state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)) - } else { - state.ini = c(parent_time_0_mean, rep(0, length(mkinmod$diffs) - 1)) - } + state.ini_used <- state.ini_auto + } else { + state.ini_used <- state.ini_auto + state.ini_used[names(state.ini)] <- state.ini } + state.ini <- state.ini_used # Name the inital state variable values if they are not named yet if(is.null(names(state.ini))) names(state.ini) <- mod_vars @@ -799,19 +822,21 @@ mkinfit <- function(mkinmod, observed, names(c(degparms, errparms))) # Backtransform parameters - bparms.optim = backtransform_odeparms(fit$par, mkinmod, + bparms.optim = backtransform_odeparms(degparms, mkinmod, transform_rates = transform_rates, transform_fractions = transform_fractions) bparms.fixed = c(state.ini.fixed, parms.fixed) bparms.all = c(bparms.optim, parms.fixed) - fit$hessian_notrans <- try(numDeriv::hessian(cost_function, c(bparms.all, errparms), + fit$hessian_notrans <- try(numDeriv::hessian(cost_function, c(bparms.optim, errparms), OLS = FALSE, trans = FALSE, update_data = FALSE), silent = TRUE) - dimnames(fit$hessian_notrans) <- list(names(c(bparms.all, errparms)), - names(c(bparms.all, errparms))) + dimnames(fit$hessian_notrans) <- list(names(c(bparms.optim, errparms)), + names(c(bparms.optim, errparms))) }) + fit$call <- call + fit$error_model_algorithm <- error_model_algorithm if (fit$convergence != 0) { @@ -831,8 +856,9 @@ mkinfit <- function(mkinmod, observed, fit$calls <- calls fit$time <- fit_time - # We also need the model for summary and plotting + # We also need the model and a model name for summary and plotting fit$mkinmod <- mkinmod + fit$mkinmod$name <- mkinmod_name # We need data and predictions for summary and plotting fit$observed <- observed diff --git a/R/residuals.mkinfit.R b/R/residuals.mkinfit.R new file mode 100644 index 00000000..96bcf01c --- /dev/null +++ b/R/residuals.mkinfit.R @@ -0,0 +1,31 @@ +#' Extract residuals from an mkinfit model +#' +#' @param object An \code{\link{mkinfit}} object +#' @param standardized Should the residuals be standardized by dividing by the +#' standard deviation obtained from the fitted error model? +#' @param \dots Not used +#' @export +#' @examples +#' f <- mkinfit("DFOP", FOCUS_2006_C, quiet = TRUE) +#' residuals(f) +#' residuals(f, standardized = TRUE) +residuals.mkinfit <- function(object, standardized = FALSE, ...) { + res <- object$data[["residual"]] + if (standardized) { + if (object$err_mod == "const") { + sigma_fitted <- object$errparms["sigma"] + } + if (object$err_mod == "obs") { + sigma_names = paste0("sigma_", object$data[["variable"]]) + sigma_fitted <- object$errparms[sigma_names] + } + if (object$err_mod == "tc") { + sigma_fitted <- sigma_twocomp(object$data[["predicted"]], + sigma_low = object$errparms[1], + rsd_high = object$errparms[2]) + } + return(res / sigma_fitted) + } + return(res) +} + diff --git a/R/update.mkinfit.R b/R/update.mkinfit.R new file mode 100644 index 00000000..2f0814e0 --- /dev/null +++ b/R/update.mkinfit.R @@ -0,0 +1,57 @@ +#' Update an mkinfit model with different arguments +#' +#' This function will return an updated mkinfit object. The fitted degradation +#' model parameters from the old fit are used as starting values for the +#' updated fit. Values specified as 'parms.ini' and/or 'state.ini' will +#' override these starting values. +#' +#' @param object An mkinfit object to be updated +#' @param \dots Arguments to \code{\link{mkinfit}} that should replace +#' the arguments from the original call. Arguments set to NULL will +#' remove arguments given in the original call +#' @param evaluate Should the call be evaluated or returned as a call +#' @examples +#' \dontrun{ +#' fit <- mkinfit("DFOP", subset(FOCUS_2006_D, value != 0), quiet = TRUE) +#' update(fit, error_model = "tc") +#' } +#' @export +update.mkinfit <- function(object, ..., evaluate = TRUE) +{ + call <- object$call + + update_arguments <- match.call(expand.dots = FALSE)$... + + # Get optimised ODE parameters and let parms.ini override them + ode_optim_names <- intersect(names(object$bparms.optim), names(object$bparms.ode)) + ode_start <- object$bparms.optim[ode_optim_names] + if ("parms.ini" %in% names(update_arguments)) { + ode_start[names(update_arguments["parms.ini"])] <- update_arguments["parms.ini"] + } + if (length(ode_start)) update_arguments[["parms.ini"]] <- ode_start + + # Get optimised values for initial states and let state.ini override them + state_optim_names <- intersect(names(object$bparms.optim), paste0(names(object$bparms.state), "_0")) + state_start <- object$bparms.optim[state_optim_names] + names(state_start) <- gsub("_0$", "", names(state_start)) + if ("state.ini" %in% names(update_arguments)) { + state_start[names(update_arguments["state.ini"])] <- update_arguments["state.ini"] + } + if (length(state_start)) update_arguments[["state.ini"]] <- state_start + + if (length(update_arguments) > 0) { + update_arguments_in_call <- !is.na(match(names(update_arguments), names(call))) + + for (a in names(update_arguments)[update_arguments_in_call]) { + call[[a]] <- update_arguments[[a]] + } + + update_arguments_not_in_call <- !update_arguments_in_call + if(any(update_arguments_not_in_call)) { + call <- c(as.list(call), update_arguments[update_arguments_not_in_call]) + call <- as.call(call) + } + } + if(evaluate) eval(call, parent.frame()) + else call +} -- cgit v1.2.1