From 32e38914d066c28db060e912b5df6c24470c9e14 Mon Sep 17 00:00:00 2001
From: Johannes Ranke <jranke@uni-bremen.de>
Date: Fri, 21 Oct 2022 12:33:35 +0200
Subject: Add a simple anova method for model comparison

Update docs
---
 R/anova.saem.mmkin.R | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++
 R/saem.R             |  17 +++++++--
 2 files changed, 116 insertions(+), 4 deletions(-)
 create mode 100644 R/anova.saem.mmkin.R

(limited to 'R')

diff --git a/R/anova.saem.mmkin.R b/R/anova.saem.mmkin.R
new file mode 100644
index 00000000..d617e180
--- /dev/null
+++ b/R/anova.saem.mmkin.R
@@ -0,0 +1,103 @@
+#' Anova method for saem.mmkin objects
+#'
+#' Generate an anova object. The method to calculate the BIC is that from
+#' the saemix package.
+#'
+#' @param object An [saem.mmkin] object
+#' @param ...   further such objects
+#' @param method Method for likelihood calculation: "is" (importance sampling),
+#' "lin" (linear approximation), or "gq" (Gaussian quadrature). Passed
+#' to [saemix::logLik.SaemixObject]
+#' @param test Should a likelihood ratio test be performed? If TRUE,
+#' the alternative models are tested against the first model. Should
+#' only be done for nested models.
+#' @param model.names Optional character vector of model names
+#' @importFrom stats anova logLik update
+#' @export
+#' @return an "anova" data frame; the traditional (S3) result of anova()
+anova.saem.mmkin <- function(object, ...,
+  method = c("is", "lin", "gq"), test = FALSE, model.names = NULL)
+{
+  # The following code is heavily inspired by anova.lmer in the lme4 package
+  mCall <- match.call(expand.dots = TRUE)
+  dots <- list(...)
+  method <- match.arg(method)
+
+  is_model <- sapply(dots, is, "saem.mmkin")
+  if (any(is_model)) {
+    mods <- c(list(object), dots[is_model])
+
+    # Ensure same data, ignoring covariates
+    same_data <- sapply(dots[is_model], function(x) {
+      identical(object$data[c("ds", "name", "time", "value")],
+        x$data[c("ds", "name", "time", "value")])
+    })
+    if (!all(same_data)) {
+      stop(sum(!same_data), " objects have not been fitted to the same data")
+    }
+
+    if (is.null(model.names))
+        model.names <- vapply(as.list(mCall)[c(FALSE, TRUE, is_model)], deparse1, "")
+
+    # Sanitize model names
+    if (length(model.names) != length(mods))
+      stop("Model names vector and model list have different lengths")
+
+    if (any(duplicated(model.names)))
+      stop("Duplicate model names are not allowed")
+
+    if (max(nchar(model.names)) > 200) {
+      warning("Model names longer than 200 characters, assigning generic names")
+      model.names <- paste0("MODEL",seq_along(model.names))
+    }
+    names(mods) <- model.names
+
+    llks <- lapply(model.names, function(x) {
+      llk <- try(logLik(mods[[x]], method = method))
+      if (inherits(llk, "try-error"))
+        stop("Could not obtain log likelihood with '", method, "' method for ", x)
+      return(llk)
+    })
+
+    # Order models by increasing degrees of freedom:
+    npar <- vapply(llks, attr, FUN.VALUE=numeric(1), "df")
+    ii <- order(npar)
+    mods <- mods[ii]
+    llks <- llks[ii]
+    npar   <- npar[ii]
+
+    # Describe data for the header, as in summary.saem.mmkin
+    header <- paste("Data:", nrow(object$data), "observations of",
+      length(unique(object$data$name)), "variable(s) grouped in",
+      length(unique(object$data$ds)), "datasets\n")
+
+    # Calculate statistics
+    llk <- unlist(llks)
+    chisq <- 2 * pmax(0, c(NA, diff(llk)))
+    dfChisq <- c(NA, diff(npar))
+
+    bic <- function(x, method) {
+      BIC(x$so, method = method)
+    }
+    val <- data.frame(
+      npar = npar,
+      AIC = sapply(llks, AIC),
+      BIC = sapply(mods, bic, method = method), # We use the saemix method here
+      Lik = llk,
+      row.names = names(mods), check.names = FALSE)
+
+    if (test) {
+      testval <- data.frame(
+        Chisq = chisq,
+        Df = dfChisq,
+        "Pr(>Chisq)" = ifelse(dfChisq == 0, NA,
+          pchisq(chisq, dfChisq, lower.tail = FALSE)),
+        row.names = names(mods), check.names = FALSE)
+      val <- cbind(val, testval)
+    }
+    class(val) <- c("anova", class(val))
+    structure(val, heading = c(header))
+  } else {
+    stop("Currently, no anova method is implemented for the case of a single saem.mmkin object")
+  }
+}
diff --git a/R/saem.R b/R/saem.R
index 7c8e6444..249dd10c 100644
--- a/R/saem.R
+++ b/R/saem.R
@@ -64,10 +64,13 @@ utils::globalVariables(c("predicted", "std"))
 #' f_saem_sfo <- saem(f_mmkin_parent["SFO", ])
 #' f_saem_fomc <- saem(f_mmkin_parent["FOMC", ])
 #' f_saem_dfop <- saem(f_mmkin_parent["DFOP", ])
+#' anova(f_saem_sfo, f_saem_fomc, f_saem_dfop)
+#' anova(f_saem_sfo, f_saem_dfop, test = TRUE)
 #' illparms(f_saem_dfop)
-#' update(f_saem_dfop, covariance.model = diag(c(1, 1, 1, 0)))
-#' AIC(f_saem_dfop)
+#' f_saem_dfop_red <- update(f_saem_dfop, no_random_effect = "g_qlogis")
+#' anova(f_saem_dfop, f_saem_dfop_red, test = TRUE)
 #'
+#' anova(f_saem_sfo, f_saem_fomc, f_saem_dfop)
 #' # The returned saem.mmkin object contains an SaemixObject, therefore we can use
 #' # functions from saemix
 #' library(saemix)
@@ -79,7 +82,7 @@ utils::globalVariables(c("predicted", "std"))
 #'
 #' f_mmkin_parent_tc <- update(f_mmkin_parent, error_model = "tc")
 #' f_saem_fomc_tc <- saem(f_mmkin_parent_tc["FOMC", ])
-#' compare.saemix(f_saem_fomc$so, f_saem_fomc_tc$so)
+#' anova(f_saem_fomc, f_saem_fomc_tc, test = TRUE)
 #'
 #' sfo_sfo <- mkinmod(parent = mkinsub("SFO", "A1"),
 #'   A1 = mkinsub("SFO"))
@@ -712,8 +715,14 @@ saemix_data <- function(object, covariates = NULL, verbose = FALSE, ...) {
   return(res)
 }
 
+#' logLik method for saem.mmkin objects
+#'
+#' @param method Passed to [saemix::logLik.SaemixObject]
 #' @export
-logLik.saem.mmkin <- function(object, ...) return(logLik(object$so))
+logLik.saem.mmkin <- function(object, ..., method = c("lin", "is", "gq")) {
+  method <- match.arg(method)
+  return(logLik(object$so, method = method))
+}
 
 #' @export
 update.saem.mmkin <- function(object, ..., evaluate = TRUE) {
-- 
cgit v1.2.3