From 510436646b1bdd5b8cfab70be29334bd3cc9c828 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Fri, 29 May 2020 15:03:04 +0200 Subject: Warn if standardized residuals are unlikely normal This revealed a bug in the data returned in mkinfit$data in the case of the d_3 algorithm, which also affected the residual plot - the data from the direct fitting was not returned even if this was the better method. --- R/mkinfit.R | 100 ++++++++++++++++++++++++++++------------------------ R/summary.mkinfit.R | 5 +++ 2 files changed, 59 insertions(+), 46 deletions(-) (limited to 'R') diff --git a/R/mkinfit.R b/R/mkinfit.R index f9cb3304..d7b1b7f4 100644 --- a/R/mkinfit.R +++ b/R/mkinfit.R @@ -98,7 +98,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' @param quiet Suppress printing out the current value of the negative #' log-likelihood after each improvement? #' @param atol Absolute error tolerance, passed to [deSolve::ode()]. Default -#' is 1e-8, which is lower than the default in the [deSolve::lsoda()] +#' is 1e-8, which is lower than the default in the [deSolve::lsoda()] #' function which is used per default. #' @param rtol Absolute error tolerance, passed to [deSolve::ode()]. Default #' is 1e-10, much lower than in [deSolve::lsoda()]. @@ -119,7 +119,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' least squares fitting ("OLS") is selected. If the error model is "obs", or #' "tc", the "d_3" algorithm is selected. #' -#' The algorithm "d_3" will directly minimize the negative log-likelihood +#' The algorithm "d_3" will directly minimize the negative log-likelihood #' and independently also use the three step algorithm described below. #' The fit with the higher likelihood is returned. #' @@ -150,7 +150,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' @param trace_parms Should a trace of the parameter values be listed? #' @param \dots Further arguments that will be passed on to #' [deSolve::ode()]. -#' @importFrom stats nlminb aggregate dist +#' @importFrom stats nlminb aggregate dist shapiro.test #' @return A list with "mkinfit" in the class attribute. #' @note When using the "IORE" submodel for metabolites, fitting with #' "transform_rates = TRUE" (the default) often leads to failures of the @@ -168,7 +168,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' for measurement error in analytical chemistry. *Technometrics* 37(2), 176-184. #' #' Ranke J and Meinecke S (2019) Error Models for the Kinetic Evaluation of Chemical -#' Degradation Data. *Environments* 6(12) 124 +#' Degradation Data. *Environments* 6(12) 124 #' [doi:10.3390/environments6120124](https://doi.org/10.3390/environments6120124). #' @examples #' @@ -177,60 +177,62 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) #' summary(fit) #' #' # One parent compound, one metabolite, both single first order. +#' # We remove zero values from FOCUS dataset D in order to avoid warnings +#' FOCUS_D <- subset(FOCUS_2006_D, value != 0) #' # Use mkinsub for convenience in model formulation. Pathway to sink included per default. #' SFO_SFO <- mkinmod( #' parent = mkinsub("SFO", "m1"), #' m1 = mkinsub("SFO")) -#' # Fit the model to the FOCUS example dataset D using defaults -#' print(system.time(fit <- mkinfit(SFO_SFO, FOCUS_2006_D, -#' solution_type = "eigen", quiet = TRUE))) -#' parms(fit) -#' endpoints(fit) +#' +#' # Fit the model quietly to the FOCUS example dataset D using defaults +#' fit <- mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE) +#' # Since mkin 0.9.50.3, we get a warning about non-normality of residuals, +#' # so we try an alternative error model +#' fit.tc <- mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE, error_model = "tc") +#' # This avoids the warning, and the likelihood ratio test confirms it is preferable +#' lrtest(fit.tc, fit) +#' # We can also allow for different variances of parent and metabolite as error model +#' fit.obs <- mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE, error_model = "obs") +#' # This also avoids the warning about non-normality, but the two-component error model +#' # has significantly higher likelihood +#' lrtest(fit.obs, fit.tc) +#' parms(fit.tc) +#' endpoints(fit.tc) +#' +#' # We can show a quick (only one replication) benchmark for this case, as we +#' # have several alternative solution methods for the model. We skip +#' # uncompiled deSolve, as it is so slow. More benchmarks are found in the +#' # benchmark vignette #' \dontrun{ -#' # deSolve is slower when no C compiler (gcc) was available during model generation -#' print(system.time(fit.deSolve <- mkinfit(SFO_SFO, FOCUS_2006_D, -#' solution_type = "deSolve"))) -#' parms(fit.deSolve) -#' endpoints(fit.deSolve) +#' if(require(rbenchmark)) { +#' benchmark(replications = 1, order = "relative", columns = c("test", "relative", "elapsed"), +#' deSolve_compiled = mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE, error_model = "tc", +#' solution_type = "deSolve", use_compiled = TRUE), +#' eigen = mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE, error_model = "tc", +#' solution_type = "eigen"), +#' analytical = mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE, error_model = "tc", +#' solution_type = "analytical")) +#' } #' } #' -#' # Use stepwise fitting, using optimised parameters from parent only fit, FOMC +#' # Use stepwise fitting, using optimised parameters from parent only fit, FOMC-SFO #' \dontrun{ #' FOMC_SFO <- mkinmod( #' parent = mkinsub("FOMC", "m1"), #' m1 = mkinsub("SFO")) -#' # Fit the model to the FOCUS example dataset D using defaults -#' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D, quiet = TRUE) -#' # Use starting parameters from parent only FOMC fit -#' fit.FOMC = mkinfit("FOMC", FOCUS_2006_D, quiet = TRUE) -#' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D, quiet = TRUE, -#' parms.ini = fit.FOMC$bparms.ode) -#' -#' # Use stepwise fitting, using optimised parameters from parent only fit, SFORB -#' SFORB_SFO <- mkinmod( -#' parent = list(type = "SFORB", to = "m1", sink = TRUE), -#' m1 = list(type = "SFO")) -#' # Fit the model to the FOCUS example dataset D using defaults -#' fit.SFORB_SFO <- mkinfit(SFORB_SFO, FOCUS_2006_D, quiet = TRUE) -#' fit.SFORB_SFO.deSolve <- mkinfit(SFORB_SFO, FOCUS_2006_D, solution_type = "deSolve", -#' quiet = TRUE) -#' # Use starting parameters from parent only SFORB fit (not really needed in this case) -#' fit.SFORB = mkinfit("SFORB", FOCUS_2006_D, quiet = TRUE) -#' fit.SFORB_SFO <- mkinfit(SFORB_SFO, FOCUS_2006_D, parms.ini = fit.SFORB$bparms.ode, quiet = TRUE) -#' } -#' -#' \dontrun{ -#' # Weighted fits, including IRLS (error_model = "obs") -#' SFO_SFO.ff <- mkinmod(parent = mkinsub("SFO", "m1"), -#' m1 = mkinsub("SFO"), use_of_ff = "max") -#' f.noweight <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, quiet = TRUE) -#' summary(f.noweight) -#' f.obs <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, error_model = "obs", quiet = TRUE) -#' summary(f.obs) -#' f.tc <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, error_model = "tc", quiet = TRUE) -#' summary(f.tc) -#' } +#' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_D, quiet = TRUE) +#' # Again, we get a warning and try a more sophisticated error model +#' fit.FOMC_SFO.tc <- mkinfit(FOMC_SFO, FOCUS_D, quiet = TRUE, error_model = "tc") +#' # This model has a higher likelihood, but not significantly so +#' lrtest(fit.tc, fit.FOMC_SFO.tc) +#' # Also, the missing standard error for log_beta and the t-tests for alpha +#' # and beta indicate overparameterisation +#' summary(fit.FOMC_SFO.tc, data = FALSE) #' +#' # We can easily use starting parameters from the parent only fit (only for illustration) +#' fit.FOMC = mkinfit("FOMC", FOCUS_2006_D, quiet = TRUE, error_model = "tc") +#' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_D, quiet = TRUE, +#' parms.ini = fit.FOMC$bparms.ode, error_model = "tc") #' #' @export mkinfit <- function(mkinmod, observed, @@ -716,6 +718,7 @@ mkinfit <- function(mkinmod, observed, errparms <- fit_direct$par[errparms_index] } else { cost.current <- Inf # reset to avoid conflict with the OLS step + data_direct <- current_data # We need this later if it was better } } if (error_model_algorithm != "direct") { @@ -785,6 +788,7 @@ mkinfit <- function(mkinmod, observed, fit$d_3_message <- d_3_messages["direct"] degparms <- fit$par[degparms_index] errparms <- fit$par[errparms_index] + current_data <- data_direct } } } @@ -914,6 +918,10 @@ mkinfit <- function(mkinmod, observed, fit$errparms <- errparms fit$df.residual <- n_observed - length(c(degparms, errparms)) + # Check for normal distribution of residuals + fit$shapiro.p <- shapiro.test(residuals.mkinfit(fit, standardized = TRUE))$p.value + if (fit$shapiro.p < 0.05) warning("The p-value for the Shapiro-Wilk test of normality on standardized residuals is < 0.05") + fit$date <- date() fit$version <- as.character(utils::packageVersion("mkin")) fit$Rversion <- paste(R.version$major, R.version$minor, sep=".") diff --git a/R/summary.mkinfit.R b/R/summary.mkinfit.R index 2dc74bd7..2c291ffd 100644 --- a/R/summary.mkinfit.R +++ b/R/summary.mkinfit.R @@ -165,6 +165,7 @@ summary.mkinfit <- function(object, data = TRUE, distimes = TRUE, alpha = 0.05, } ans$bparms.ode <- object$bparms.ode + ans$shapiro.p <- object$shapiro.p ep <- endpoints(object) if (length(ep$ff) != 0) ans$ff <- ep$ff @@ -228,6 +229,10 @@ print.summary.mkinfit <- function(x, digits = max(3, getOption("digits") - 3), . row.names = " ")) } + if (!is.null(x$shapiro.p) && x$shapiro.p < 0.05) { + warning("The p-value for the Shapiro-Wilk test of normality on standardized residuals is < 0.05") + } + cat("\nOptimised, transformed parameters with symmetric confidence intervals:\n") print(signif(x$par, digits = digits)) -- cgit v1.2.1