Merge branch 'master' into mxkin

author: Johannes Ranke <jranke@uni-bremen.de> 2020-03-27 11:47:48 +0100
committer: Johannes Ranke <jranke@uni-bremen.de> 2020-03-27 11:47:48 +0100
commit: 20ece4e0bcbeceb90a940e04a858f4ffb6d6b5e4 (patch)
tree: 7595dbb6e129332a6ad0c273ecd3fbd92643e0d5 /R
parent: 731dd9450f08868140f90af7a305133ec9342994 (diff)
parent: 68eed166cbe10a5ee79f5b1139261dea98234b22 (diff)
7 files changed, 214 insertions, 29 deletions
diff --git a/R/aw.R b/R/aw.R
new file mode 100644
index 00000000..f46b20ec
--- /dev/null
+++ b/R/aw.R
@@ -0,0 +1,58 @@
+#' Calculate Akaike weights for model averaging
+#'
+#' Akaike weights are calculated based on the relative
+#' expected Kullback-Leibler information as specified
+#' by Burnham and Anderson (2004).
+#'
+#' @param object An [mmkin] column object, containing two or more
+#'   [mkinfit] models that have been fitted to the same data,
+#'   or an mkinfit object. In the latter case, further mkinfit
+#'   objects fitted to the same data should be specified
+#'   as dots arguments.
+#' @param \dots Not used in the method for [mmkin] column objects,
+#'   further [mkinfit] objects in the method for mkinfit objects.
+#' @references Burnham KP and Anderson DR (2004) Multimodel
+#'   Inference: Understanding AIC and BIC in Model Selection.
+#'   *Sociological Methods & Research* **33**(2) 261-304
+#' @md
+#' @examples
+#' \dontrun{
+#' f_sfo <- mkinfit("SFO", FOCUS_2006_D, quiet = TRUE)
+#' f_dfop <- mkinfit("DFOP", FOCUS_2006_D, quiet = TRUE)
+#' aw_sfo_dfop <- aw(f_sfo, f_dfop)
+#' sum(aw_sfo_dfop)
+#' aw_sfo_dfop # SFO gets more weight as it has less parameters and a similar fit
+#' f <- mmkin(c("SFO", "FOMC", "DFOP"), list("FOCUS D" = FOCUS_2006_D), cores = 1, quiet = TRUE)
+#' aw(f)
+#' sum(aw(f))
+#' aw(f[c("SFO", "DFOP")])
+#' }
+#' @export
+aw <- function(object, ...) UseMethod("aw")
+
+#' @export
+#' @rdname aw
+aw.mkinfit <- function(object, ...) {
+  oo <- list(...)
+  data_object <- object$data[c("time", "variable", "observed")]
+  for (i in seq_along(oo)) {
+    if (!inherits(oo[[i]], "mkinfit")) stop("Please supply only mkinfit objects")
+    data_other_object <- oo[[i]]$data[c("time", "variable", "observed")]
+    if (!identical(data_object, data_other_object)) {
+      stop("It seems that the mkinfit objects have not all been fitted to the same data")
+    }
+  }
+  all_objects <- list(object, ...)
+  AIC_all <- sapply(all_objects, AIC)
+  delta_i <- AIC_all - min(AIC_all)
+  denom <- sum(exp(-delta_i/2))
+  w_i <- exp(-delta_i/2) / denom
+  return(w_i)
+}
+
+#' @export
+#' @rdname aw
+aw.mmkin <- function(object, ...) {
+  if (ncol(object) > 1) stop("Please supply an mmkin column object")
+  do.call(aw, object)
+}
diff --git a/R/loftest.R b/R/loftest.R
new file mode 100644
index 00000000..29721e23
--- /dev/null
+++ b/R/loftest.R
@@ -0,0 +1,112 @@
+#' Lack-of-fit test for models fitted to data with replicates
+#'
+#' This is a generic function with a method currently only defined for mkinfit
+#' objects. It fits an anova model to the data contained in the object and
+#' compares the likelihoods using the likelihood ratio test
+#' \code{\link[lmtest]{lrtest.default}} from the lmtest package.
+#'
+#' The anova model is interpreted as the simplest form of an mkinfit model,
+#' assuming only a constant variance about the means, but not enforcing any
+#' structure of the means, so we have one model parameter for every mean
+#' of replicate samples.
+#'
+#' @param object A model object with a defined loftest method
+#' @param \dots Not used
+#' @export
+loftest <- function(object, ...) {
+  UseMethod("loftest")
+}
+
+#' @rdname loftest
+#' @importFrom stats logLik lm dnorm coef
+#' @seealso lrtest
+#' @examples
+#' \dontrun{
+#' test_data <- subset(synthetic_data_for_UBA_2014[[12]]$data, name == "parent")
+#' sfo_fit <- mkinfit("SFO", test_data, quiet = TRUE)
+#' plot_res(sfo_fit) # We see a clear pattern in the residuals
+#' loftest(sfo_fit)  # We have a clear lack of fit
+#' #
+#' # We try a different model (the one that was used to generate the data)
+#' dfop_fit <- mkinfit("DFOP", test_data, quiet = TRUE)
+#' plot_res(dfop_fit) # We don't see systematic deviations, but heteroscedastic residuals
+#' # therefore we should consider adapting the error model, although we have
+#' loftest(dfop_fit) # no lack of fit
+#' #
+#' # This is the anova model used internally for the comparison
+#' test_data_anova <- test_data
+#' test_data_anova$time <- as.factor(test_data_anova$time)
+#' anova_fit <- lm(value ~ time, data = test_data_anova)
+#' summary(anova_fit)
+#' logLik(anova_fit) # We get the same likelihood and degrees of freedom
+#' #
+#' test_data_2 <- synthetic_data_for_UBA_2014[[12]]$data
+#' m_synth_SFO_lin <- mkinmod(parent = list(type = "SFO", to = "M1"),
+#'   M1 = list(type = "SFO", to = "M2"),
+#'   M2 = list(type = "SFO"), use_of_ff = "max")
+#' sfo_lin_fit <- mkinfit(m_synth_SFO_lin, test_data_2, quiet = TRUE)
+#' plot_res(sfo_lin_fit) # not a good model, we try parallel formation
+#' loftest(sfo_lin_fit)
+#' #
+#' m_synth_SFO_par <- mkinmod(parent = list(type = "SFO", to = c("M1", "M2")),
+#'   M1 = list(type = "SFO"),
+#'   M2 = list(type = "SFO"), use_of_ff = "max")
+#' sfo_par_fit <- mkinfit(m_synth_SFO_par, test_data_2, quiet = TRUE)
+#' plot_res(sfo_par_fit) # much better for metabolites
+#' loftest(sfo_par_fit)
+#' #
+#' m_synth_DFOP_par <- mkinmod(parent = list(type = "DFOP", to = c("M1", "M2")),
+#'   M1 = list(type = "SFO"),
+#'   M2 = list(type = "SFO"), use_of_ff = "max")
+#' dfop_par_fit <- mkinfit(m_synth_DFOP_par, test_data_2, quiet = TRUE)
+#' plot_res(dfop_par_fit) # No visual lack of fit
+#' loftest(dfop_par_fit)  # no lack of fit found by the test
+#' #
+#' # The anova model used for comparison in the case of transformation products
+#' test_data_anova_2 <- dfop_par_fit$data
+#' test_data_anova_2$variable <- as.factor(test_data_anova_2$variable)
+#' test_data_anova_2$time <- as.factor(test_data_anova_2$time)
+#' anova_fit_2 <- lm(observed ~ time:variable - 1, data = test_data_anova_2)
+#' summary(anova_fit_2)
+#' }
+#' @export
+loftest.mkinfit <- function(object, ...) {
+
+  name_function <- function(x) {
+    object_name <- paste(x$mkinmod$name, "with error model", x$err_mod)
+    if (length(x$bparms.fixed) > 0) {
+      object_name <- paste(object_name,
+        "and fixed parameter(s)",
+        paste(names(x$bparms.fixed), collapse = ", "))
+    }
+    return(object_name)
+  }
+
+  # Check if we have replicates in the data
+  if (max(aggregate(object$data$observed,
+    by = list(object$data$variable, object$data$time), length)$x) == 1) {
+    stop("Not defined for fits to data without replicates")
+  }
+
+  data_anova <- object$data
+  data_anova$time <- as.factor(data_anova$time)
+  data_anova$variable <- as.factor(data_anova$variable)
+  if (nlevels(data_anova$variable) == 1) {
+    object_2 <- lm(observed ~ time - 1, data = data_anova)
+  } else {
+    object_2 <- lm(observed ~ variable:time - 1,
+      data = data_anova)
+  }
+
+  object_2$mkinmod <- list(name = "ANOVA")
+  object_2$err_mod <- "const"
+  sigma_mle <- sqrt(sum(residuals(object_2)^2)/nobs(object_2))
+  object_2$logLik <- sum(dnorm(x = object_2$residuals,
+      mean = 0, sd = sigma_mle, log = TRUE))
+  object_2$data <- object$data # to make the nobs.mkinfit method work
+  object_2$bparms.optim <- coef(object_2)
+  object_2$errparms <- 1 # We have estimated one error model parameter
+  class(object_2) <- "mkinfit"
+
+  lmtest::lrtest.default(object_2, object, name = name_function)
+}
diff --git a/R/logLik.mkinfit.R b/R/logLik.mkinfit.R
index cadc0d0a..1c025893 100644
--- a/R/logLik.mkinfit.R
+++ b/R/logLik.mkinfit.R
@@ -1,15 +1,15 @@
 #' Calculated the log-likelihood of a fitted mkinfit object
-#' 
+#'
 #' This function returns the product of the likelihood densities of each
 #' observed value, as calculated as part of the fitting procedure using
 #' \code{\link{dnorm}}, i.e. assuming normal distribution, and with the means
 #' predicted by the degradation model, and the standard deviations predicted by
 #' the error model.
-#' 
+#'
 #' The total number of estimated parameters returned with the value of the
 #' likelihood is calculated as the sum of fitted degradation model parameters
 #' and the fitted error model parameters.
-#' 
+#'
 #' @param object An object of class \code{\link{mkinfit}}.
 #' @param \dots For compatibility with the generic method
 #' @return An object of class \code{\link{logLik}} with the number of estimated
@@ -19,7 +19,7 @@
 #' @seealso Compare the AIC of columns of \code{\link{mmkin}} objects using
 #'   \code{\link{AIC.mmkin}}.
 #' @examples
-#' 
+#'
 #'   \dontrun{
 #'   sfo_sfo <- mkinmod(
 #'     parent = mkinsub("SFO", to = "m1"),
@@ -31,11 +31,12 @@
 #'   f_tc <- mkinfit(sfo_sfo, d_t, error_model = "tc", quiet = TRUE)
 #'   AIC(f_nw, f_obs, f_tc)
 #'   }
-#' 
+#'
 #' @export
 logLik.mkinfit <- function(object, ...) {
   val <- object$logLik
   # Number of estimated parameters
   attr(val, "df") <- length(object$bparms.optim) + length(object$errparms)
+  class(val) <- "logLik"
   return(val)
 }
diff --git a/R/lrtest.mkinfit.R b/R/lrtest.mkinfit.R
index a5689830..c3f4d38e 100644
--- a/R/lrtest.mkinfit.R
+++ b/R/lrtest.mkinfit.R
@@ -19,7 +19,8 @@ lmtest::lrtest
 #' lower number of fitted parameters (null hypothesis).
 #'
 #' @importFrom stats logLik update
-#' @param object An \code{\link{mkinfit}} object
+#' @param object An \code{\link{mkinfit}} object, or an \code{\link{mmkin}} column
+#'  object containing two fits to the same data.
 #' @param object_2 Optionally, another mkinfit object fitted to the same data.
 #' @param \dots Argument to \code{\link{mkinfit}}, passed to
 #'   \code{\link{update.mkinfit}} for creating the alternative fitted object.
@@ -36,7 +37,7 @@ lmtest::lrtest
 #' #lrtest(dfop_fit, error_model = "tc")
 #' #lrtest(dfop_fit, fixed_parms = c(k2 = 0))
 #'
-#' # However, this equivalent syntax works for static help pages
+#' # However, this equivalent syntax also works for static help pages
 #' lrtest(dfop_fit, update(dfop_fit, error_model = "tc"))
 #' lrtest(dfop_fit, update(dfop_fit, fixed_parms = c(k2 = 0)))
 #' }
@@ -68,3 +69,10 @@ lrtest.mkinfit <- function(object, object_2 = NULL, ...) {
     lmtest::lrtest.default(object_2, object, name = name_function)
   }
 }
+
+#' @rdname lrtest.mkinfit
+#' @export
+lrtest.mmkin <- function(object, ...) {
+  if (nrow(object) != 2 | ncol(object) > 1) stop("Only works for a column containing two mkinfit objects")
+  lrtest(object[[1, 1]], object[[2, 1]])
+}
diff --git a/R/mmkin.R b/R/mmkin.R
index 4f3f28a9..39eb5740 100644
--- a/R/mmkin.R
+++ b/R/mmkin.R
@@ -1,10 +1,10 @@
 #' Fit one or more kinetic models with one or more state variables to one or
 #' more datasets
-#' 
+#'
 #' This function calls \code{\link{mkinfit}} on all combinations of models and
 #' datasets specified in its first two arguments.
-#' 
-#' @param models Either a character vector of shorthand names like 
+#'
+#' @param models Either a character vector of shorthand names like
 #'   \code{c("SFO", "FOMC", "DFOP", "HS", "SFORB")}, or an optionally named
 #'   list of \code{\link{mkinmod}} objects.
 #' @param datasets An optionally named list of datasets suitable as observed
@@ -24,28 +24,28 @@
 #'   plotting.
 #' @keywords optimize
 #' @examples
-#' 
+#'
 #' \dontrun{
 #' m_synth_SFO_lin <- mkinmod(parent = mkinsub("SFO", "M1"),
 #'                            M1 = mkinsub("SFO", "M2"),
 #'                            M2 = mkinsub("SFO"), use_of_ff = "max")
-#' 
+#'
 #' m_synth_FOMC_lin <- mkinmod(parent = mkinsub("FOMC", "M1"),
 #'                             M1 = mkinsub("SFO", "M2"),
 #'                             M2 = mkinsub("SFO"), use_of_ff = "max")
-#' 
+#'
 #' models <- list(SFO_lin = m_synth_SFO_lin, FOMC_lin = m_synth_FOMC_lin)
 #' datasets <- lapply(synthetic_data_for_UBA_2014[1:3], function(x) x$data)
 #' names(datasets) <- paste("Dataset", 1:3)
-#' 
+#'
 #' time_default <- system.time(fits.0 <- mmkin(models, datasets, quiet = TRUE))
 #' time_1 <- system.time(fits.4 <- mmkin(models, datasets, cores = 1, quiet = TRUE))
-#' 
+#'
 #' time_default
 #' time_1
-#' 
+#'
 #' endpoints(fits.0[["SFO_lin", 2]])
-#' 
+#'
 #' # plot.mkinfit handles rows or columns of mmkin result objects
 #' plot(fits.0[1, ])
 #' plot(fits.0[1, ], obs_var = c("M1", "M2"))
@@ -58,7 +58,7 @@
 #' # allow to plot the observed variables separately
 #' plot(fits.0[1, 1])
 #' }
-#' 
+#'
 #' @export mmkin
 mmkin <- function(models = c("SFO", "FOMC", "DFOP"), datasets,
                   cores = round(detectCores()/2), cluster = NULL, ...)
diff --git a/R/plot.mmkin.R b/R/plot.mmkin.R
index eefafe12..182e74ca 100644
--- a/R/plot.mmkin.R
+++ b/R/plot.mmkin.R
@@ -1,13 +1,13 @@
 #' Plot model fits (observed and fitted) and the residuals for a row or column
 #' of an mmkin object
-#' 
+#'
 #' When x is a row selected from an mmkin object (\code{\link{[.mmkin}}), the
 #' same model fitted for at least one dataset is shown. When it is a column,
 #' the fit of at least one model to the same dataset is shown.
-#' 
+#'
 #' If the current plot device is a \code{\link[tikzDevice]{tikz}} device, then
 #' latex is being used for the formatting of the chi2 error level.
-#' 
+#'
 #' @param x An object of class \code{\link{mmkin}}, with either one row or one
 #'   column.
 #' @param main The main title placed on the outer margin of the plot.
@@ -24,12 +24,13 @@
 #' @param cex Passed to the plot functions and \code{\link{mtext}}.
 #' @param rel.height.middle The relative height of the middle plot, if more
 #'   than two rows of plots are shown.
+#' @param ymax Maximum y axis value for \code{\link{plot.mkinfit}}.
 #' @param \dots Further arguments passed to \code{\link{plot.mkinfit}} and
 #'   \code{\link{mkinresplot}}.
 #' @return The function is called for its side effect.
 #' @author Johannes Ranke
 #' @examples
-#' 
+#'
 #'   \dontrun{
 #'   # Only use one core not to offend CRAN checks
 #'   fits <- mmkin(c("FOMC", "HS"),
@@ -37,22 +38,23 @@
 #'                 cores = 1, quiet = TRUE, error_model = "tc")
 #'   plot(fits[, "FOCUS C"])
 #'   plot(fits["FOMC", ])
-#' 
+#'
 #'   # We can also plot a single fit, if we like the way plot.mmkin works, but then the plot
 #'   # height should be smaller than the plot width (this is not possible for the html pages
 #'   # generated by pkgdown, as far as I know).
 #'   plot(fits["FOMC", "FOCUS C"]) # same as plot(fits[1, 2])
-#' 
+#'
 #'   # Show the error models
 #'   plot(fits["FOMC", ], resplot = "errmod")
 #'   }
-#' 
+#'
 #' @export
-plot.mmkin <- function(x, main = "auto", legends = 1, 
+plot.mmkin <- function(x, main = "auto", legends = 1,
                        resplot = c("time", "errmod"),
                        show_errmin = TRUE,
                        errmin_var = "All data", errmin_digits = 3,
-                       cex = 0.7, rel.height.middle = 0.9, ...) {
+                       cex = 0.7, rel.height.middle = 0.9,
+                       ymax = "auto", ...) {
   n.m <- nrow(x)
   n.d <- ncol(x)
 
@@ -107,7 +109,11 @@ plot.mmkin <- function(x, main = "auto", legends = 1,
     }
 
     fit <- x[[i.fit]]
-    plot(fit, legend = legends == i.fit, ...)
+    if (ymax == "auto") {
+      plot(fit, legend = legends == i.fit, ...)
+    } else {
+      plot(fit, legend = legends == i.fit, ylim = c(0, ymax), ...)
+    }
 
     title(main, outer = TRUE, line = -2)
 
diff --git a/R/sigma_twocomp.R b/R/sigma_twocomp.R
index c9a15aa8..1e012d15 100644
--- a/R/sigma_twocomp.R
+++ b/R/sigma_twocomp.R
@@ -1,4 +1,4 @@
-#' Two component error model
+#' Two-component error model
 #' 
 #' Function describing the standard deviation of the measurement error in
 #' dependence of the measured value \eqn{y}:
author	Johannes Ranke <jranke@uni-bremen.de>	2020-03-27 11:47:48 +0100
committer	Johannes Ranke <jranke@uni-bremen.de>	2020-03-27 11:47:48 +0100
commit	20ece4e0bcbeceb90a940e04a858f4ffb6d6b5e4 (patch)
tree	7595dbb6e129332a6ad0c273ecd3fbd92643e0d5 /R
parent	731dd9450f08868140f90af7a305133ec9342994 (diff)
parent	68eed166cbe10a5ee79f5b1139261dea98234b22 (diff)