From 20b9c584e7c43ecbb708459e531c24a1a4751e17 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Sat, 9 Nov 2019 01:05:51 +0100 Subject: Add a lack-of-fit test - Switch an example dataset in the test setup to a dataset with replicates, adapt tests - Skip the test for lrtest with an update specification as it does not only fail when pkgdown generates static help pages, but also in testthat --- R/loftest.R | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++ R/logLik.mkinfit.R | 11 +++--- 2 files changed, 118 insertions(+), 5 deletions(-) create mode 100644 R/loftest.R (limited to 'R') diff --git a/R/loftest.R b/R/loftest.R new file mode 100644 index 00000000..29721e23 --- /dev/null +++ b/R/loftest.R @@ -0,0 +1,112 @@ +#' Lack-of-fit test for models fitted to data with replicates +#' +#' This is a generic function with a method currently only defined for mkinfit +#' objects. It fits an anova model to the data contained in the object and +#' compares the likelihoods using the likelihood ratio test +#' \code{\link[lmtest]{lrtest.default}} from the lmtest package. +#' +#' The anova model is interpreted as the simplest form of an mkinfit model, +#' assuming only a constant variance about the means, but not enforcing any +#' structure of the means, so we have one model parameter for every mean +#' of replicate samples. +#' +#' @param object A model object with a defined loftest method +#' @param \dots Not used +#' @export +loftest <- function(object, ...) { + UseMethod("loftest") +} + +#' @rdname loftest +#' @importFrom stats logLik lm dnorm coef +#' @seealso lrtest +#' @examples +#' \dontrun{ +#' test_data <- subset(synthetic_data_for_UBA_2014[[12]]$data, name == "parent") +#' sfo_fit <- mkinfit("SFO", test_data, quiet = TRUE) +#' plot_res(sfo_fit) # We see a clear pattern in the residuals +#' loftest(sfo_fit) # We have a clear lack of fit +#' # +#' # We try a different model (the one that was used to generate the data) +#' dfop_fit <- mkinfit("DFOP", test_data, quiet = TRUE) +#' plot_res(dfop_fit) # We don't see systematic deviations, but heteroscedastic residuals +#' # therefore we should consider adapting the error model, although we have +#' loftest(dfop_fit) # no lack of fit +#' # +#' # This is the anova model used internally for the comparison +#' test_data_anova <- test_data +#' test_data_anova$time <- as.factor(test_data_anova$time) +#' anova_fit <- lm(value ~ time, data = test_data_anova) +#' summary(anova_fit) +#' logLik(anova_fit) # We get the same likelihood and degrees of freedom +#' # +#' test_data_2 <- synthetic_data_for_UBA_2014[[12]]$data +#' m_synth_SFO_lin <- mkinmod(parent = list(type = "SFO", to = "M1"), +#' M1 = list(type = "SFO", to = "M2"), +#' M2 = list(type = "SFO"), use_of_ff = "max") +#' sfo_lin_fit <- mkinfit(m_synth_SFO_lin, test_data_2, quiet = TRUE) +#' plot_res(sfo_lin_fit) # not a good model, we try parallel formation +#' loftest(sfo_lin_fit) +#' # +#' m_synth_SFO_par <- mkinmod(parent = list(type = "SFO", to = c("M1", "M2")), +#' M1 = list(type = "SFO"), +#' M2 = list(type = "SFO"), use_of_ff = "max") +#' sfo_par_fit <- mkinfit(m_synth_SFO_par, test_data_2, quiet = TRUE) +#' plot_res(sfo_par_fit) # much better for metabolites +#' loftest(sfo_par_fit) +#' # +#' m_synth_DFOP_par <- mkinmod(parent = list(type = "DFOP", to = c("M1", "M2")), +#' M1 = list(type = "SFO"), +#' M2 = list(type = "SFO"), use_of_ff = "max") +#' dfop_par_fit <- mkinfit(m_synth_DFOP_par, test_data_2, quiet = TRUE) +#' plot_res(dfop_par_fit) # No visual lack of fit +#' loftest(dfop_par_fit) # no lack of fit found by the test +#' # +#' # The anova model used for comparison in the case of transformation products +#' test_data_anova_2 <- dfop_par_fit$data +#' test_data_anova_2$variable <- as.factor(test_data_anova_2$variable) +#' test_data_anova_2$time <- as.factor(test_data_anova_2$time) +#' anova_fit_2 <- lm(observed ~ time:variable - 1, data = test_data_anova_2) +#' summary(anova_fit_2) +#' } +#' @export +loftest.mkinfit <- function(object, ...) { + + name_function <- function(x) { + object_name <- paste(x$mkinmod$name, "with error model", x$err_mod) + if (length(x$bparms.fixed) > 0) { + object_name <- paste(object_name, + "and fixed parameter(s)", + paste(names(x$bparms.fixed), collapse = ", ")) + } + return(object_name) + } + + # Check if we have replicates in the data + if (max(aggregate(object$data$observed, + by = list(object$data$variable, object$data$time), length)$x) == 1) { + stop("Not defined for fits to data without replicates") + } + + data_anova <- object$data + data_anova$time <- as.factor(data_anova$time) + data_anova$variable <- as.factor(data_anova$variable) + if (nlevels(data_anova$variable) == 1) { + object_2 <- lm(observed ~ time - 1, data = data_anova) + } else { + object_2 <- lm(observed ~ variable:time - 1, + data = data_anova) + } + + object_2$mkinmod <- list(name = "ANOVA") + object_2$err_mod <- "const" + sigma_mle <- sqrt(sum(residuals(object_2)^2)/nobs(object_2)) + object_2$logLik <- sum(dnorm(x = object_2$residuals, + mean = 0, sd = sigma_mle, log = TRUE)) + object_2$data <- object$data # to make the nobs.mkinfit method work + object_2$bparms.optim <- coef(object_2) + object_2$errparms <- 1 # We have estimated one error model parameter + class(object_2) <- "mkinfit" + + lmtest::lrtest.default(object_2, object, name = name_function) +} diff --git a/R/logLik.mkinfit.R b/R/logLik.mkinfit.R index cadc0d0a..1c025893 100644 --- a/R/logLik.mkinfit.R +++ b/R/logLik.mkinfit.R @@ -1,15 +1,15 @@ #' Calculated the log-likelihood of a fitted mkinfit object -#' +#' #' This function returns the product of the likelihood densities of each #' observed value, as calculated as part of the fitting procedure using #' \code{\link{dnorm}}, i.e. assuming normal distribution, and with the means #' predicted by the degradation model, and the standard deviations predicted by #' the error model. -#' +#' #' The total number of estimated parameters returned with the value of the #' likelihood is calculated as the sum of fitted degradation model parameters #' and the fitted error model parameters. -#' +#' #' @param object An object of class \code{\link{mkinfit}}. #' @param \dots For compatibility with the generic method #' @return An object of class \code{\link{logLik}} with the number of estimated @@ -19,7 +19,7 @@ #' @seealso Compare the AIC of columns of \code{\link{mmkin}} objects using #' \code{\link{AIC.mmkin}}. #' @examples -#' +#' #' \dontrun{ #' sfo_sfo <- mkinmod( #' parent = mkinsub("SFO", to = "m1"), @@ -31,11 +31,12 @@ #' f_tc <- mkinfit(sfo_sfo, d_t, error_model = "tc", quiet = TRUE) #' AIC(f_nw, f_obs, f_tc) #' } -#' +#' #' @export logLik.mkinfit <- function(object, ...) { val <- object$logLik # Number of estimated parameters attr(val, "df") <- length(object$bparms.optim) + length(object$errparms) + class(val) <- "logLik" return(val) } -- cgit v1.2.1 From 6064a205d42220062506b11d6334b4f8e3217041 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Tue, 12 Nov 2019 15:16:28 +0100 Subject: lrtest method for mmkin columns with two fits --- R/lrtest.mkinfit.R | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'R') diff --git a/R/lrtest.mkinfit.R b/R/lrtest.mkinfit.R index a5689830..380cf3a5 100644 --- a/R/lrtest.mkinfit.R +++ b/R/lrtest.mkinfit.R @@ -18,6 +18,8 @@ lmtest::lrtest #' parameters (alternative hypothesis) is listed first, then the model with the #' lower number of fitted parameters (null hypothesis). #' +#' The method for mmkin objects only works for column objects with two members. +#' #' @importFrom stats logLik update #' @param object An \code{\link{mkinfit}} object #' @param object_2 Optionally, another mkinfit object fitted to the same data. @@ -68,3 +70,10 @@ lrtest.mkinfit <- function(object, object_2 = NULL, ...) { lmtest::lrtest.default(object_2, object, name = name_function) } } + +#' @rdname lrtest.mkinfit +#' @export +lrtest.mmkin <- function(object, ...) { + if (nrow(object) != 2 | ncol(object) > 1) stop("Only works for a column containing two mkinfit objects") + lrtest(object[[1, 1]], object[[2, 1]]) +} -- cgit v1.2.1 From f6385b071cee2c261da28bf98fd1599da1a30ddb Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Wed, 13 Nov 2019 11:05:39 +0100 Subject: Likelihood ratio test for mmkin columns with two fits --- R/lrtest.mkinfit.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'R') diff --git a/R/lrtest.mkinfit.R b/R/lrtest.mkinfit.R index 380cf3a5..0054ea88 100644 --- a/R/lrtest.mkinfit.R +++ b/R/lrtest.mkinfit.R @@ -18,10 +18,9 @@ lmtest::lrtest #' parameters (alternative hypothesis) is listed first, then the model with the #' lower number of fitted parameters (null hypothesis). #' -#' The method for mmkin objects only works for column objects with two members. -#' #' @importFrom stats logLik update -#' @param object An \code{\link{mkinfit}} object +#' @param object An \code{\link{mkinfit}} object, or an \code{\link{mmkin}} column +#' object containing two fits to the same data. #' @param object_2 Optionally, another mkinfit object fitted to the same data. #' @param \dots Argument to \code{\link{mkinfit}}, passed to #' \code{\link{update.mkinfit}} for creating the alternative fitted object. -- cgit v1.2.1 From c3700bec3a704660d3ade7a54c56b7084beb02b4 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Wed, 13 Nov 2019 21:15:35 +0100 Subject: Calculate Akaike weights --- R/aw.R | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 R/aw.R (limited to 'R') diff --git a/R/aw.R b/R/aw.R new file mode 100644 index 00000000..24078f38 --- /dev/null +++ b/R/aw.R @@ -0,0 +1,60 @@ +#' Calculate Akaike weights for model averaging +#' +#' Akaike weights are calculated based on the relative +#' expected Kullback-Leibler information as specified +#' by Burnham and Anderson (2004). +#' +#' @param object An mmkin column object, containing two or more +#' \code{\link{mkinfit}} models that have been fitted to the same data, +#' or an mkinfit object. In the latter case, further mkinfit +#' objects fitted to the same data should be specified +#' as dots arguments. +#' @param \dots Not used in the method for mmkin column objects, +#' further mkinfit objects in the method for mkinfit objects. +#' @references Burnham KP and Anderson DR (2004) Multimodel +#' Inference: Understanding AIC and BIC in Model Selection +#' Sociological Methods & Research 33(2) 261-304 +#' @examples +#' \dontrun{ +#' f_sfo <- mkinfit("SFO", FOCUS_2006_D, quiet = TRUE) +#' f_dfop <- mkinfit("DFOP", FOCUS_2006_D, quiet = TRUE) +#' aw_sfo_dfop <- aw(f_sfo, f_dfop) +#' sum(aw_sfo_dfop) +#' aw_sfo_dfop # SFO gets more weight as it has less parameters and a similar fit +#' f <- mmkin(c("SFO", "FOMC", "DFOP"), list("FOCUS D" = FOCUS_2006_D), cores = 1, quiet = TRUE) +#' aw(f) +#' sum(aw(f)) +#' aw(f[c("SFO", "DFOP")]) +#' } +#' @export +aw <- function(object, ...) UseMethod("aw") + +#' @export +#' @rdname aw +aw.mkinfit <- function(object, ...) { + oo <- list(...) + data_object <- object$data[c("time", "variable", "observed")] + for (i in seq_along(oo)) { + if (!inherits(oo[[i]], "mkinfit")) stop("Please supply only mkinfit objects") + data_other_object <- oo[[i]]$data[c("time", "variable", "observed")] + if (!identical(data_object, data_other_object)) { + stop("It seems that the mkinfit objects have not all been fitted to the same data") + } + } + all_objects <- list(object, ...) + AIC_all <- sapply(all_objects, AIC) + delta_i <- AIC_all - min(AIC_all) + denom <- sum(exp(-delta_i/2)) + w_i <- exp(-delta_i/2) / denom + return(w_i) +} + +#' @export +#' @rdname aw +aw.mmkin <- function(object, ...) { + if (ncol(object) > 1) stop("Please supply an mmkin column object") + do.call(aw, object) +} + + + -- cgit v1.2.1 From 51535c943d2846bc73ad69a11909aac606996b7d Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Wed, 13 Nov 2019 22:16:43 +0100 Subject: Improve formatting using markdown --- R/aw.R | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'R') diff --git a/R/aw.R b/R/aw.R index 24078f38..f46b20ec 100644 --- a/R/aw.R +++ b/R/aw.R @@ -4,16 +4,17 @@ #' expected Kullback-Leibler information as specified #' by Burnham and Anderson (2004). #' -#' @param object An mmkin column object, containing two or more -#' \code{\link{mkinfit}} models that have been fitted to the same data, +#' @param object An [mmkin] column object, containing two or more +#' [mkinfit] models that have been fitted to the same data, #' or an mkinfit object. In the latter case, further mkinfit #' objects fitted to the same data should be specified #' as dots arguments. -#' @param \dots Not used in the method for mmkin column objects, -#' further mkinfit objects in the method for mkinfit objects. +#' @param \dots Not used in the method for [mmkin] column objects, +#' further [mkinfit] objects in the method for mkinfit objects. #' @references Burnham KP and Anderson DR (2004) Multimodel -#' Inference: Understanding AIC and BIC in Model Selection -#' Sociological Methods & Research 33(2) 261-304 +#' Inference: Understanding AIC and BIC in Model Selection. +#' *Sociological Methods & Research* **33**(2) 261-304 +#' @md #' @examples #' \dontrun{ #' f_sfo <- mkinfit("SFO", FOCUS_2006_D, quiet = TRUE) @@ -55,6 +56,3 @@ aw.mmkin <- function(object, ...) { if (ncol(object) > 1) stop("Please supply an mmkin column object") do.call(aw, object) } - - - -- cgit v1.2.1 From 7ea467e0e0ba5bf51540b26e197869a58ed1a092 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Mon, 9 Dec 2019 15:21:11 +0100 Subject: Consistently use "two-component error model" instead of "two component error model" --- R/sigma_twocomp.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'R') diff --git a/R/sigma_twocomp.R b/R/sigma_twocomp.R index c9a15aa8..1e012d15 100644 --- a/R/sigma_twocomp.R +++ b/R/sigma_twocomp.R @@ -1,4 +1,4 @@ -#' Two component error model +#' Two-component error model #' #' Function describing the standard deviation of the measurement error in #' dependence of the measured value \eqn{y}: -- cgit v1.2.1 From 1868c1c6b98afa4c8a11b7c065d717bfb4ec1a8e Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Mon, 16 Dec 2019 02:51:19 +0100 Subject: Argument ymax for plot.mmkin --- R/plot.mmkin.R | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'R') diff --git a/R/plot.mmkin.R b/R/plot.mmkin.R index eefafe12..182e74ca 100644 --- a/R/plot.mmkin.R +++ b/R/plot.mmkin.R @@ -1,13 +1,13 @@ #' Plot model fits (observed and fitted) and the residuals for a row or column #' of an mmkin object -#' +#' #' When x is a row selected from an mmkin object (\code{\link{[.mmkin}}), the #' same model fitted for at least one dataset is shown. When it is a column, #' the fit of at least one model to the same dataset is shown. -#' +#' #' If the current plot device is a \code{\link[tikzDevice]{tikz}} device, then #' latex is being used for the formatting of the chi2 error level. -#' +#' #' @param x An object of class \code{\link{mmkin}}, with either one row or one #' column. #' @param main The main title placed on the outer margin of the plot. @@ -24,12 +24,13 @@ #' @param cex Passed to the plot functions and \code{\link{mtext}}. #' @param rel.height.middle The relative height of the middle plot, if more #' than two rows of plots are shown. +#' @param ymax Maximum y axis value for \code{\link{plot.mkinfit}}. #' @param \dots Further arguments passed to \code{\link{plot.mkinfit}} and #' \code{\link{mkinresplot}}. #' @return The function is called for its side effect. #' @author Johannes Ranke #' @examples -#' +#' #' \dontrun{ #' # Only use one core not to offend CRAN checks #' fits <- mmkin(c("FOMC", "HS"), @@ -37,22 +38,23 @@ #' cores = 1, quiet = TRUE, error_model = "tc") #' plot(fits[, "FOCUS C"]) #' plot(fits["FOMC", ]) -#' +#' #' # We can also plot a single fit, if we like the way plot.mmkin works, but then the plot #' # height should be smaller than the plot width (this is not possible for the html pages #' # generated by pkgdown, as far as I know). #' plot(fits["FOMC", "FOCUS C"]) # same as plot(fits[1, 2]) -#' +#' #' # Show the error models #' plot(fits["FOMC", ], resplot = "errmod") #' } -#' +#' #' @export -plot.mmkin <- function(x, main = "auto", legends = 1, +plot.mmkin <- function(x, main = "auto", legends = 1, resplot = c("time", "errmod"), show_errmin = TRUE, errmin_var = "All data", errmin_digits = 3, - cex = 0.7, rel.height.middle = 0.9, ...) { + cex = 0.7, rel.height.middle = 0.9, + ymax = "auto", ...) { n.m <- nrow(x) n.d <- ncol(x) @@ -107,7 +109,11 @@ plot.mmkin <- function(x, main = "auto", legends = 1, } fit <- x[[i.fit]] - plot(fit, legend = legends == i.fit, ...) + if (ymax == "auto") { + plot(fit, legend = legends == i.fit, ...) + } else { + plot(fit, legend = legends == i.fit, ylim = c(0, ymax), ...) + } title(main, outer = TRUE, line = -2) -- cgit v1.2.1 From ccb9c63a93d202b5adea425ae2938c8705e866c9 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Fri, 14 Feb 2020 17:47:42 +0100 Subject: Delete trailing whitespace --- R/mmkin.R | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'R') diff --git a/R/mmkin.R b/R/mmkin.R index 4f3f28a9..39eb5740 100644 --- a/R/mmkin.R +++ b/R/mmkin.R @@ -1,10 +1,10 @@ #' Fit one or more kinetic models with one or more state variables to one or #' more datasets -#' +#' #' This function calls \code{\link{mkinfit}} on all combinations of models and #' datasets specified in its first two arguments. -#' -#' @param models Either a character vector of shorthand names like +#' +#' @param models Either a character vector of shorthand names like #' \code{c("SFO", "FOMC", "DFOP", "HS", "SFORB")}, or an optionally named #' list of \code{\link{mkinmod}} objects. #' @param datasets An optionally named list of datasets suitable as observed @@ -24,28 +24,28 @@ #' plotting. #' @keywords optimize #' @examples -#' +#' #' \dontrun{ #' m_synth_SFO_lin <- mkinmod(parent = mkinsub("SFO", "M1"), #' M1 = mkinsub("SFO", "M2"), #' M2 = mkinsub("SFO"), use_of_ff = "max") -#' +#' #' m_synth_FOMC_lin <- mkinmod(parent = mkinsub("FOMC", "M1"), #' M1 = mkinsub("SFO", "M2"), #' M2 = mkinsub("SFO"), use_of_ff = "max") -#' +#' #' models <- list(SFO_lin = m_synth_SFO_lin, FOMC_lin = m_synth_FOMC_lin) #' datasets <- lapply(synthetic_data_for_UBA_2014[1:3], function(x) x$data) #' names(datasets) <- paste("Dataset", 1:3) -#' +#' #' time_default <- system.time(fits.0 <- mmkin(models, datasets, quiet = TRUE)) #' time_1 <- system.time(fits.4 <- mmkin(models, datasets, cores = 1, quiet = TRUE)) -#' +#' #' time_default #' time_1 -#' +#' #' endpoints(fits.0[["SFO_lin", 2]]) -#' +#' #' # plot.mkinfit handles rows or columns of mmkin result objects #' plot(fits.0[1, ]) #' plot(fits.0[1, ], obs_var = c("M1", "M2")) @@ -58,7 +58,7 @@ #' # allow to plot the observed variables separately #' plot(fits.0[1, 1]) #' } -#' +#' #' @export mmkin mmkin <- function(models = c("SFO", "FOMC", "DFOP"), datasets, cores = round(detectCores()/2), cluster = NULL, ...) -- cgit v1.2.1 From 68eed166cbe10a5ee79f5b1139261dea98234b22 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Wed, 18 Mar 2020 16:16:18 +0100 Subject: Clarify comment in example code --- R/lrtest.mkinfit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'R') diff --git a/R/lrtest.mkinfit.R b/R/lrtest.mkinfit.R index 0054ea88..c3f4d38e 100644 --- a/R/lrtest.mkinfit.R +++ b/R/lrtest.mkinfit.R @@ -37,7 +37,7 @@ lmtest::lrtest #' #lrtest(dfop_fit, error_model = "tc") #' #lrtest(dfop_fit, fixed_parms = c(k2 = 0)) #' -#' # However, this equivalent syntax works for static help pages +#' # However, this equivalent syntax also works for static help pages #' lrtest(dfop_fit, update(dfop_fit, error_model = "tc")) #' lrtest(dfop_fit, update(dfop_fit, fixed_parms = c(k2 = 0))) #' } -- cgit v1.2.1