From 32e38914d066c28db060e912b5df6c24470c9e14 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Fri, 21 Oct 2022 12:33:35 +0200 Subject: Add a simple anova method for model comparison Update docs --- R/anova.saem.mmkin.R | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++ R/saem.R | 17 +++++++-- 2 files changed, 116 insertions(+), 4 deletions(-) create mode 100644 R/anova.saem.mmkin.R (limited to 'R') diff --git a/R/anova.saem.mmkin.R b/R/anova.saem.mmkin.R new file mode 100644 index 00000000..d617e180 --- /dev/null +++ b/R/anova.saem.mmkin.R @@ -0,0 +1,103 @@ +#' Anova method for saem.mmkin objects +#' +#' Generate an anova object. The method to calculate the BIC is that from +#' the saemix package. +#' +#' @param object An [saem.mmkin] object +#' @param ... further such objects +#' @param method Method for likelihood calculation: "is" (importance sampling), +#' "lin" (linear approximation), or "gq" (Gaussian quadrature). Passed +#' to [saemix::logLik.SaemixObject] +#' @param test Should a likelihood ratio test be performed? If TRUE, +#' the alternative models are tested against the first model. Should +#' only be done for nested models. +#' @param model.names Optional character vector of model names +#' @importFrom stats anova logLik update +#' @export +#' @return an "anova" data frame; the traditional (S3) result of anova() +anova.saem.mmkin <- function(object, ..., + method = c("is", "lin", "gq"), test = FALSE, model.names = NULL) +{ + # The following code is heavily inspired by anova.lmer in the lme4 package + mCall <- match.call(expand.dots = TRUE) + dots <- list(...) + method <- match.arg(method) + + is_model <- sapply(dots, is, "saem.mmkin") + if (any(is_model)) { + mods <- c(list(object), dots[is_model]) + + # Ensure same data, ignoring covariates + same_data <- sapply(dots[is_model], function(x) { + identical(object$data[c("ds", "name", "time", "value")], + x$data[c("ds", "name", "time", "value")]) + }) + if (!all(same_data)) { + stop(sum(!same_data), " objects have not been fitted to the same data") + } + + if (is.null(model.names)) + model.names <- vapply(as.list(mCall)[c(FALSE, TRUE, is_model)], deparse1, "") + + # Sanitize model names + if (length(model.names) != length(mods)) + stop("Model names vector and model list have different lengths") + + if (any(duplicated(model.names))) + stop("Duplicate model names are not allowed") + + if (max(nchar(model.names)) > 200) { + warning("Model names longer than 200 characters, assigning generic names") + model.names <- paste0("MODEL",seq_along(model.names)) + } + names(mods) <- model.names + + llks <- lapply(model.names, function(x) { + llk <- try(logLik(mods[[x]], method = method)) + if (inherits(llk, "try-error")) + stop("Could not obtain log likelihood with '", method, "' method for ", x) + return(llk) + }) + + # Order models by increasing degrees of freedom: + npar <- vapply(llks, attr, FUN.VALUE=numeric(1), "df") + ii <- order(npar) + mods <- mods[ii] + llks <- llks[ii] + npar <- npar[ii] + + # Describe data for the header, as in summary.saem.mmkin + header <- paste("Data:", nrow(object$data), "observations of", + length(unique(object$data$name)), "variable(s) grouped in", + length(unique(object$data$ds)), "datasets\n") + + # Calculate statistics + llk <- unlist(llks) + chisq <- 2 * pmax(0, c(NA, diff(llk))) + dfChisq <- c(NA, diff(npar)) + + bic <- function(x, method) { + BIC(x$so, method = method) + } + val <- data.frame( + npar = npar, + AIC = sapply(llks, AIC), + BIC = sapply(mods, bic, method = method), # We use the saemix method here + Lik = llk, + row.names = names(mods), check.names = FALSE) + + if (test) { + testval <- data.frame( + Chisq = chisq, + Df = dfChisq, + "Pr(>Chisq)" = ifelse(dfChisq == 0, NA, + pchisq(chisq, dfChisq, lower.tail = FALSE)), + row.names = names(mods), check.names = FALSE) + val <- cbind(val, testval) + } + class(val) <- c("anova", class(val)) + structure(val, heading = c(header)) + } else { + stop("Currently, no anova method is implemented for the case of a single saem.mmkin object") + } +} diff --git a/R/saem.R b/R/saem.R index 7c8e6444..249dd10c 100644 --- a/R/saem.R +++ b/R/saem.R @@ -64,10 +64,13 @@ utils::globalVariables(c("predicted", "std")) #' f_saem_sfo <- saem(f_mmkin_parent["SFO", ]) #' f_saem_fomc <- saem(f_mmkin_parent["FOMC", ]) #' f_saem_dfop <- saem(f_mmkin_parent["DFOP", ]) +#' anova(f_saem_sfo, f_saem_fomc, f_saem_dfop) +#' anova(f_saem_sfo, f_saem_dfop, test = TRUE) #' illparms(f_saem_dfop) -#' update(f_saem_dfop, covariance.model = diag(c(1, 1, 1, 0))) -#' AIC(f_saem_dfop) +#' f_saem_dfop_red <- update(f_saem_dfop, no_random_effect = "g_qlogis") +#' anova(f_saem_dfop, f_saem_dfop_red, test = TRUE) #' +#' anova(f_saem_sfo, f_saem_fomc, f_saem_dfop) #' # The returned saem.mmkin object contains an SaemixObject, therefore we can use #' # functions from saemix #' library(saemix) @@ -79,7 +82,7 @@ utils::globalVariables(c("predicted", "std")) #' #' f_mmkin_parent_tc <- update(f_mmkin_parent, error_model = "tc") #' f_saem_fomc_tc <- saem(f_mmkin_parent_tc["FOMC", ]) -#' compare.saemix(f_saem_fomc$so, f_saem_fomc_tc$so) +#' anova(f_saem_fomc, f_saem_fomc_tc, test = TRUE) #' #' sfo_sfo <- mkinmod(parent = mkinsub("SFO", "A1"), #' A1 = mkinsub("SFO")) @@ -712,8 +715,14 @@ saemix_data <- function(object, covariates = NULL, verbose = FALSE, ...) { return(res) } +#' logLik method for saem.mmkin objects +#' +#' @param method Passed to [saemix::logLik.SaemixObject] #' @export -logLik.saem.mmkin <- function(object, ...) return(logLik(object$so)) +logLik.saem.mmkin <- function(object, ..., method = c("lin", "is", "gq")) { + method <- match.arg(method) + return(logLik(object$so, method = method)) +} #' @export update.saem.mmkin <- function(object, ..., evaluate = TRUE) { -- cgit v1.2.1