From 20b9c584e7c43ecbb708459e531c24a1a4751e17 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Sat, 9 Nov 2019 01:05:51 +0100 Subject: Add a lack-of-fit test - Switch an example dataset in the test setup to a dataset with replicates, adapt tests - Skip the test for lrtest with an update specification as it does not only fail when pkgdown generates static help pages, but also in testthat --- NAMESPACE | 5 + NEWS.md | 2 + R/loftest.R | 112 +++++++++++++ R/logLik.mkinfit.R | 11 +- _pkgdown.yml | 1 + docs/news/index.html | 1 + docs/reference/index.html | 6 + docs/reference/loftest-1.png | Bin 0 -> 27354 bytes docs/reference/loftest-2.png | Bin 0 -> 27721 bytes docs/reference/loftest-3.png | Bin 0 -> 65409 bytes docs/reference/loftest-4.png | Bin 0 -> 64457 bytes docs/reference/loftest-5.png | Bin 0 -> 63057 bytes docs/reference/loftest.html | 343 +++++++++++++++++++++++++++++++++++++++ docs/sitemap.xml | 3 + man/loftest.Rd | 81 +++++++++ test.log | 26 +-- tests/testthat/FOCUS_2006_D.csf | 2 +- tests/testthat/setup_script.R | 10 +- tests/testthat/test_confidence.R | 7 +- tests/testthat/test_tests.R | 22 ++- 20 files changed, 605 insertions(+), 27 deletions(-) create mode 100644 R/loftest.R create mode 100644 docs/reference/loftest-1.png create mode 100644 docs/reference/loftest-2.png create mode 100644 docs/reference/loftest-3.png create mode 100644 docs/reference/loftest-4.png create mode 100644 docs/reference/loftest-5.png create mode 100644 docs/reference/loftest.html create mode 100644 man/loftest.Rd diff --git a/NAMESPACE b/NAMESPACE index 8ea4c684..f428a612 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ S3method("[",mmkin) S3method(AIC,mmkin) S3method(BIC,mmkin) S3method(confint,mkinfit) +S3method(loftest,mkinfit) S3method(logLik,mkinfit) S3method(lrtest,mkinfit) S3method(mkinpredict,mkinfit) @@ -32,6 +33,7 @@ export(backtransform_odeparms) export(endpoints) export(ilr) export(invilr) +export(loftest) export(logistic.solution) export(lrtest) export(max_twa_dfop) @@ -73,8 +75,11 @@ importFrom(parallel,parLapply) importFrom(stats,AIC) importFrom(stats,BIC) importFrom(stats,aggregate) +importFrom(stats,coef) importFrom(stats,cov2cor) importFrom(stats,dist) +importFrom(stats,dnorm) +importFrom(stats,lm) importFrom(stats,logLik) importFrom(stats,nlminb) importFrom(stats,nobs) diff --git a/NEWS.md b/NEWS.md index 395cd623..965105f4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # mkin 0.9.49.8 (unreleased) +- 'loftest': Add a lack-of-fit test + - 'plot_res', 'plot_sep' and 'mkinerrplot': Add the possibility to show standardized residuals and make it the default for fits with error models other than 'const' - 'lrtest.mkinfit': Improve naming of the compared fits in the case of fixed parameters diff --git a/R/loftest.R b/R/loftest.R new file mode 100644 index 00000000..29721e23 --- /dev/null +++ b/R/loftest.R @@ -0,0 +1,112 @@ +#' Lack-of-fit test for models fitted to data with replicates +#' +#' This is a generic function with a method currently only defined for mkinfit +#' objects. It fits an anova model to the data contained in the object and +#' compares the likelihoods using the likelihood ratio test +#' \code{\link[lmtest]{lrtest.default}} from the lmtest package. +#' +#' The anova model is interpreted as the simplest form of an mkinfit model, +#' assuming only a constant variance about the means, but not enforcing any +#' structure of the means, so we have one model parameter for every mean +#' of replicate samples. +#' +#' @param object A model object with a defined loftest method +#' @param \dots Not used +#' @export +loftest <- function(object, ...) { + UseMethod("loftest") +} + +#' @rdname loftest +#' @importFrom stats logLik lm dnorm coef +#' @seealso lrtest +#' @examples +#' \dontrun{ +#' test_data <- subset(synthetic_data_for_UBA_2014[[12]]$data, name == "parent") +#' sfo_fit <- mkinfit("SFO", test_data, quiet = TRUE) +#' plot_res(sfo_fit) # We see a clear pattern in the residuals +#' loftest(sfo_fit) # We have a clear lack of fit +#' # +#' # We try a different model (the one that was used to generate the data) +#' dfop_fit <- mkinfit("DFOP", test_data, quiet = TRUE) +#' plot_res(dfop_fit) # We don't see systematic deviations, but heteroscedastic residuals +#' # therefore we should consider adapting the error model, although we have +#' loftest(dfop_fit) # no lack of fit +#' # +#' # This is the anova model used internally for the comparison +#' test_data_anova <- test_data +#' test_data_anova$time <- as.factor(test_data_anova$time) +#' anova_fit <- lm(value ~ time, data = test_data_anova) +#' summary(anova_fit) +#' logLik(anova_fit) # We get the same likelihood and degrees of freedom +#' # +#' test_data_2 <- synthetic_data_for_UBA_2014[[12]]$data +#' m_synth_SFO_lin <- mkinmod(parent = list(type = "SFO", to = "M1"), +#' M1 = list(type = "SFO", to = "M2"), +#' M2 = list(type = "SFO"), use_of_ff = "max") +#' sfo_lin_fit <- mkinfit(m_synth_SFO_lin, test_data_2, quiet = TRUE) +#' plot_res(sfo_lin_fit) # not a good model, we try parallel formation +#' loftest(sfo_lin_fit) +#' # +#' m_synth_SFO_par <- mkinmod(parent = list(type = "SFO", to = c("M1", "M2")), +#' M1 = list(type = "SFO"), +#' M2 = list(type = "SFO"), use_of_ff = "max") +#' sfo_par_fit <- mkinfit(m_synth_SFO_par, test_data_2, quiet = TRUE) +#' plot_res(sfo_par_fit) # much better for metabolites +#' loftest(sfo_par_fit) +#' # +#' m_synth_DFOP_par <- mkinmod(parent = list(type = "DFOP", to = c("M1", "M2")), +#' M1 = list(type = "SFO"), +#' M2 = list(type = "SFO"), use_of_ff = "max") +#' dfop_par_fit <- mkinfit(m_synth_DFOP_par, test_data_2, quiet = TRUE) +#' plot_res(dfop_par_fit) # No visual lack of fit +#' loftest(dfop_par_fit) # no lack of fit found by the test +#' # +#' # The anova model used for comparison in the case of transformation products +#' test_data_anova_2 <- dfop_par_fit$data +#' test_data_anova_2$variable <- as.factor(test_data_anova_2$variable) +#' test_data_anova_2$time <- as.factor(test_data_anova_2$time) +#' anova_fit_2 <- lm(observed ~ time:variable - 1, data = test_data_anova_2) +#' summary(anova_fit_2) +#' } +#' @export +loftest.mkinfit <- function(object, ...) { + + name_function <- function(x) { + object_name <- paste(x$mkinmod$name, "with error model", x$err_mod) + if (length(x$bparms.fixed) > 0) { + object_name <- paste(object_name, + "and fixed parameter(s)", + paste(names(x$bparms.fixed), collapse = ", ")) + } + return(object_name) + } + + # Check if we have replicates in the data + if (max(aggregate(object$data$observed, + by = list(object$data$variable, object$data$time), length)$x) == 1) { + stop("Not defined for fits to data without replicates") + } + + data_anova <- object$data + data_anova$time <- as.factor(data_anova$time) + data_anova$variable <- as.factor(data_anova$variable) + if (nlevels(data_anova$variable) == 1) { + object_2 <- lm(observed ~ time - 1, data = data_anova) + } else { + object_2 <- lm(observed ~ variable:time - 1, + data = data_anova) + } + + object_2$mkinmod <- list(name = "ANOVA") + object_2$err_mod <- "const" + sigma_mle <- sqrt(sum(residuals(object_2)^2)/nobs(object_2)) + object_2$logLik <- sum(dnorm(x = object_2$residuals, + mean = 0, sd = sigma_mle, log = TRUE)) + object_2$data <- object$data # to make the nobs.mkinfit method work + object_2$bparms.optim <- coef(object_2) + object_2$errparms <- 1 # We have estimated one error model parameter + class(object_2) <- "mkinfit" + + lmtest::lrtest.default(object_2, object, name = name_function) +} diff --git a/R/logLik.mkinfit.R b/R/logLik.mkinfit.R index cadc0d0a..1c025893 100644 --- a/R/logLik.mkinfit.R +++ b/R/logLik.mkinfit.R @@ -1,15 +1,15 @@ #' Calculated the log-likelihood of a fitted mkinfit object -#' +#' #' This function returns the product of the likelihood densities of each #' observed value, as calculated as part of the fitting procedure using #' \code{\link{dnorm}}, i.e. assuming normal distribution, and with the means #' predicted by the degradation model, and the standard deviations predicted by #' the error model. -#' +#' #' The total number of estimated parameters returned with the value of the #' likelihood is calculated as the sum of fitted degradation model parameters #' and the fitted error model parameters. -#' +#' #' @param object An object of class \code{\link{mkinfit}}. #' @param \dots For compatibility with the generic method #' @return An object of class \code{\link{logLik}} with the number of estimated @@ -19,7 +19,7 @@ #' @seealso Compare the AIC of columns of \code{\link{mmkin}} objects using #' \code{\link{AIC.mmkin}}. #' @examples -#' +#' #' \dontrun{ #' sfo_sfo <- mkinmod( #' parent = mkinsub("SFO", to = "m1"), @@ -31,11 +31,12 @@ #' f_tc <- mkinfit(sfo_sfo, d_t, error_model = "tc", quiet = TRUE) #' AIC(f_nw, f_obs, f_tc) #' } -#' +#' #' @export logLik.mkinfit <- function(object, ...) { val <- object$logLik # Number of estimated parameters attr(val, "df") <- length(object$bparms.optim) + length(object$errparms) + class(val) <- "logLik" return(val) } diff --git a/_pkgdown.yml b/_pkgdown.yml index c222a817..c298256f 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -22,6 +22,7 @@ reference: - confint.mkinfit - update.mkinfit - lrtest.mkinfit + - loftest - mkinerrmin - endpoints - CAKE_export diff --git a/docs/news/index.html b/docs/news/index.html index 48ba25e5..9aa2e18b 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -134,6 +134,7 @@ mkin 0.9.49.8 (unreleased) Unreleased