From 32e38914d066c28db060e912b5df6c24470c9e14 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Fri, 21 Oct 2022 12:33:35 +0200 Subject: Add a simple anova method for model comparison Update docs --- NAMESPACE | 2 + R/anova.saem.mmkin.R | 103 +++++++++++++++++++++++++++++++++++++++++++++++ R/saem.R | 17 ++++++-- man/anova.saem.mmkin.Rd | 35 ++++++++++++++++ man/logLik.saem.mmkin.Rd | 14 +++++++ man/parhist.Rd | 3 ++ man/saem.Rd | 6 ++- 7 files changed, 175 insertions(+), 5 deletions(-) create mode 100644 R/anova.saem.mmkin.R create mode 100644 man/anova.saem.mmkin.Rd create mode 100644 man/logLik.saem.mmkin.Rd diff --git a/NAMESPACE b/NAMESPACE index 80fc927d..eb34304e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ S3method(AIC,mhmkin) S3method(AIC,mmkin) S3method(BIC,mhmkin) S3method(BIC,mmkin) +S3method(anova,saem.mmkin) S3method(aw,mixed.mmkin) S3method(aw,mkinfit) S3method(aw,mmkin) @@ -155,6 +156,7 @@ importFrom(rlang,"!!!") importFrom(stats,AIC) importFrom(stats,BIC) importFrom(stats,aggregate) +importFrom(stats,anova) importFrom(stats,as.formula) importFrom(stats,coef) importFrom(stats,coefficients) diff --git a/R/anova.saem.mmkin.R b/R/anova.saem.mmkin.R new file mode 100644 index 00000000..d617e180 --- /dev/null +++ b/R/anova.saem.mmkin.R @@ -0,0 +1,103 @@ +#' Anova method for saem.mmkin objects +#' +#' Generate an anova object. The method to calculate the BIC is that from +#' the saemix package. +#' +#' @param object An [saem.mmkin] object +#' @param ... further such objects +#' @param method Method for likelihood calculation: "is" (importance sampling), +#' "lin" (linear approximation), or "gq" (Gaussian quadrature). Passed +#' to [saemix::logLik.SaemixObject] +#' @param test Should a likelihood ratio test be performed? If TRUE, +#' the alternative models are tested against the first model. Should +#' only be done for nested models. +#' @param model.names Optional character vector of model names +#' @importFrom stats anova logLik update +#' @export +#' @return an "anova" data frame; the traditional (S3) result of anova() +anova.saem.mmkin <- function(object, ..., + method = c("is", "lin", "gq"), test = FALSE, model.names = NULL) +{ + # The following code is heavily inspired by anova.lmer in the lme4 package + mCall <- match.call(expand.dots = TRUE) + dots <- list(...) + method <- match.arg(method) + + is_model <- sapply(dots, is, "saem.mmkin") + if (any(is_model)) { + mods <- c(list(object), dots[is_model]) + + # Ensure same data, ignoring covariates + same_data <- sapply(dots[is_model], function(x) { + identical(object$data[c("ds", "name", "time", "value")], + x$data[c("ds", "name", "time", "value")]) + }) + if (!all(same_data)) { + stop(sum(!same_data), " objects have not been fitted to the same data") + } + + if (is.null(model.names)) + model.names <- vapply(as.list(mCall)[c(FALSE, TRUE, is_model)], deparse1, "") + + # Sanitize model names + if (length(model.names) != length(mods)) + stop("Model names vector and model list have different lengths") + + if (any(duplicated(model.names))) + stop("Duplicate model names are not allowed") + + if (max(nchar(model.names)) > 200) { + warning("Model names longer than 200 characters, assigning generic names") + model.names <- paste0("MODEL",seq_along(model.names)) + } + names(mods) <- model.names + + llks <- lapply(model.names, function(x) { + llk <- try(logLik(mods[[x]], method = method)) + if (inherits(llk, "try-error")) + stop("Could not obtain log likelihood with '", method, "' method for ", x) + return(llk) + }) + + # Order models by increasing degrees of freedom: + npar <- vapply(llks, attr, FUN.VALUE=numeric(1), "df") + ii <- order(npar) + mods <- mods[ii] + llks <- llks[ii] + npar <- npar[ii] + + # Describe data for the header, as in summary.saem.mmkin + header <- paste("Data:", nrow(object$data), "observations of", + length(unique(object$data$name)), "variable(s) grouped in", + length(unique(object$data$ds)), "datasets\n") + + # Calculate statistics + llk <- unlist(llks) + chisq <- 2 * pmax(0, c(NA, diff(llk))) + dfChisq <- c(NA, diff(npar)) + + bic <- function(x, method) { + BIC(x$so, method = method) + } + val <- data.frame( + npar = npar, + AIC = sapply(llks, AIC), + BIC = sapply(mods, bic, method = method), # We use the saemix method here + Lik = llk, + row.names = names(mods), check.names = FALSE) + + if (test) { + testval <- data.frame( + Chisq = chisq, + Df = dfChisq, + "Pr(>Chisq)" = ifelse(dfChisq == 0, NA, + pchisq(chisq, dfChisq, lower.tail = FALSE)), + row.names = names(mods), check.names = FALSE) + val <- cbind(val, testval) + } + class(val) <- c("anova", class(val)) + structure(val, heading = c(header)) + } else { + stop("Currently, no anova method is implemented for the case of a single saem.mmkin object") + } +} diff --git a/R/saem.R b/R/saem.R index 7c8e6444..249dd10c 100644 --- a/R/saem.R +++ b/R/saem.R @@ -64,10 +64,13 @@ utils::globalVariables(c("predicted", "std")) #' f_saem_sfo <- saem(f_mmkin_parent["SFO", ]) #' f_saem_fomc <- saem(f_mmkin_parent["FOMC", ]) #' f_saem_dfop <- saem(f_mmkin_parent["DFOP", ]) +#' anova(f_saem_sfo, f_saem_fomc, f_saem_dfop) +#' anova(f_saem_sfo, f_saem_dfop, test = TRUE) #' illparms(f_saem_dfop) -#' update(f_saem_dfop, covariance.model = diag(c(1, 1, 1, 0))) -#' AIC(f_saem_dfop) +#' f_saem_dfop_red <- update(f_saem_dfop, no_random_effect = "g_qlogis") +#' anova(f_saem_dfop, f_saem_dfop_red, test = TRUE) #' +#' anova(f_saem_sfo, f_saem_fomc, f_saem_dfop) #' # The returned saem.mmkin object contains an SaemixObject, therefore we can use #' # functions from saemix #' library(saemix) @@ -79,7 +82,7 @@ utils::globalVariables(c("predicted", "std")) #' #' f_mmkin_parent_tc <- update(f_mmkin_parent, error_model = "tc") #' f_saem_fomc_tc <- saem(f_mmkin_parent_tc["FOMC", ]) -#' compare.saemix(f_saem_fomc$so, f_saem_fomc_tc$so) +#' anova(f_saem_fomc, f_saem_fomc_tc, test = TRUE) #' #' sfo_sfo <- mkinmod(parent = mkinsub("SFO", "A1"), #' A1 = mkinsub("SFO")) @@ -712,8 +715,14 @@ saemix_data <- function(object, covariates = NULL, verbose = FALSE, ...) { return(res) } +#' logLik method for saem.mmkin objects +#' +#' @param method Passed to [saemix::logLik.SaemixObject] #' @export -logLik.saem.mmkin <- function(object, ...) return(logLik(object$so)) +logLik.saem.mmkin <- function(object, ..., method = c("lin", "is", "gq")) { + method <- match.arg(method) + return(logLik(object$so, method = method)) +} #' @export update.saem.mmkin <- function(object, ..., evaluate = TRUE) { diff --git a/man/anova.saem.mmkin.Rd b/man/anova.saem.mmkin.Rd new file mode 100644 index 00000000..8f9cb241 --- /dev/null +++ b/man/anova.saem.mmkin.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/anova.saem.mmkin.R +\name{anova.saem.mmkin} +\alias{anova.saem.mmkin} +\title{Anova method for saem.mmkin objects} +\usage{ +\method{anova}{saem.mmkin}( + object, + ..., + method = c("is", "lin", "gq"), + test = FALSE, + model.names = NULL +) +} +\arguments{ +\item{object}{An \link{saem.mmkin} object} + +\item{...}{further such objects} + +\item{method}{Method for likelihood calculation: "is" (importance sampling), +"lin" (linear approximation), or "gq" (Gaussian quadrature). Passed +to \link[saemix:logLik]{saemix::logLik.SaemixObject}} + +\item{test}{Should a likelihood ratio test be performed? If TRUE, +the alternative models are tested against the first model. Should +only be done for nested models.} + +\item{model.names}{Optional character vector of model names} +} +\value{ +an "anova" data frame; the traditional (S3) result of anova() +} +\description{ +Anova method for saem.mmkin objects +} diff --git a/man/logLik.saem.mmkin.Rd b/man/logLik.saem.mmkin.Rd new file mode 100644 index 00000000..8ea3426f --- /dev/null +++ b/man/logLik.saem.mmkin.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/saem.R +\name{logLik.saem.mmkin} +\alias{logLik.saem.mmkin} +\title{logLik method for saem.mmkin objects} +\usage{ +\method{logLik}{saem.mmkin}(object, ..., method = c("lin", "is", "gq")) +} +\arguments{ +\item{method}{Passed to \link[saemix:logLik]{saemix::logLik.SaemixObject}} +} +\description{ +logLik method for saem.mmkin objects +} diff --git a/man/parhist.Rd b/man/parhist.Rd index 67bbadad..f86ff734 100644 --- a/man/parhist.Rd +++ b/man/parhist.Rd @@ -6,6 +6,7 @@ \usage{ parhist( object, + llmin = -Inf, scale = c("best", "median"), lpos = "bottomleft", main = "", @@ -15,6 +16,8 @@ parhist( \arguments{ \item{object}{The \link{multistart} object} +\item{llmin}{The minimum likelihood of objects to be shown} + \item{scale}{By default, scale parameters using the best available fit. If 'median', parameters are scaled using the median parameters from all fits.} diff --git a/man/saem.Rd b/man/saem.Rd index a9c4a1bb..2b9199dd 100644 --- a/man/saem.Rd +++ b/man/saem.Rd @@ -19,6 +19,8 @@ saem(object, ...) conf.level = 0.6, solution_type = "auto", covariance.model = "auto", + covariates = NULL, + covariate_models = NULL, no_random_effect = NULL, nbiter.saemix = c(300, 100), control = list(displayProgress = FALSE, print = FALSE, nbiter.saemix = nbiter.saemix, @@ -38,13 +40,15 @@ saemix_model( degparms_start = numeric(), covariance.model = "auto", no_random_effect = NULL, + covariates = NULL, + covariate_models = NULL, test_log_parms = FALSE, conf.level = 0.6, verbose = FALSE, ... ) -saemix_data(object, verbose = FALSE, ...) +saemix_data(object, covariates = NULL, verbose = FALSE, ...) \method{parms}{saem.mmkin}(object, ci = FALSE, ...) } -- cgit v1.2.1