aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorJohannes Ranke <jranke@uni-bremen.de>2020-05-29 15:03:04 +0200
committerJohannes Ranke <jranke@uni-bremen.de>2020-05-29 15:03:04 +0200
commit510436646b1bdd5b8cfab70be29334bd3cc9c828 (patch)
tree4b3e26f658e822e18d09e2d939a5c89214566b6d /R
parent609bfe2fd7ecbdcad5f5d641f0db51541dcd6a4e (diff)
Warn if standardized residuals are unlikely normal
This revealed a bug in the data returned in mkinfit$data in the case of the d_3 algorithm, which also affected the residual plot - the data from the direct fitting was not returned even if this was the better method.
Diffstat (limited to 'R')
-rw-r--r--R/mkinfit.R100
-rw-r--r--R/summary.mkinfit.R5
2 files changed, 59 insertions, 46 deletions
diff --git a/R/mkinfit.R b/R/mkinfit.R
index f9cb3304..d7b1b7f4 100644
--- a/R/mkinfit.R
+++ b/R/mkinfit.R
@@ -98,7 +98,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
#' @param quiet Suppress printing out the current value of the negative
#' log-likelihood after each improvement?
#' @param atol Absolute error tolerance, passed to [deSolve::ode()]. Default
-#' is 1e-8, which is lower than the default in the [deSolve::lsoda()]
+#' is 1e-8, which is lower than the default in the [deSolve::lsoda()]
#' function which is used per default.
#' @param rtol Absolute error tolerance, passed to [deSolve::ode()]. Default
#' is 1e-10, much lower than in [deSolve::lsoda()].
@@ -119,7 +119,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
#' least squares fitting ("OLS") is selected. If the error model is "obs", or
#' "tc", the "d_3" algorithm is selected.
#'
-#' The algorithm "d_3" will directly minimize the negative log-likelihood
+#' The algorithm "d_3" will directly minimize the negative log-likelihood
#' and independently also use the three step algorithm described below.
#' The fit with the higher likelihood is returned.
#'
@@ -150,7 +150,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
#' @param trace_parms Should a trace of the parameter values be listed?
#' @param \dots Further arguments that will be passed on to
#' [deSolve::ode()].
-#' @importFrom stats nlminb aggregate dist
+#' @importFrom stats nlminb aggregate dist shapiro.test
#' @return A list with "mkinfit" in the class attribute.
#' @note When using the "IORE" submodel for metabolites, fitting with
#' "transform_rates = TRUE" (the default) often leads to failures of the
@@ -168,7 +168,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
#' for measurement error in analytical chemistry. *Technometrics* 37(2), 176-184.
#'
#' Ranke J and Meinecke S (2019) Error Models for the Kinetic Evaluation of Chemical
-#' Degradation Data. *Environments* 6(12) 124
+#' Degradation Data. *Environments* 6(12) 124
#' [doi:10.3390/environments6120124](https://doi.org/10.3390/environments6120124).
#' @examples
#'
@@ -177,60 +177,62 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
#' summary(fit)
#'
#' # One parent compound, one metabolite, both single first order.
+#' # We remove zero values from FOCUS dataset D in order to avoid warnings
+#' FOCUS_D <- subset(FOCUS_2006_D, value != 0)
#' # Use mkinsub for convenience in model formulation. Pathway to sink included per default.
#' SFO_SFO <- mkinmod(
#' parent = mkinsub("SFO", "m1"),
#' m1 = mkinsub("SFO"))
-#' # Fit the model to the FOCUS example dataset D using defaults
-#' print(system.time(fit <- mkinfit(SFO_SFO, FOCUS_2006_D,
-#' solution_type = "eigen", quiet = TRUE)))
-#' parms(fit)
-#' endpoints(fit)
+#'
+#' # Fit the model quietly to the FOCUS example dataset D using defaults
+#' fit <- mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE)
+#' # Since mkin 0.9.50.3, we get a warning about non-normality of residuals,
+#' # so we try an alternative error model
+#' fit.tc <- mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE, error_model = "tc")
+#' # This avoids the warning, and the likelihood ratio test confirms it is preferable
+#' lrtest(fit.tc, fit)
+#' # We can also allow for different variances of parent and metabolite as error model
+#' fit.obs <- mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE, error_model = "obs")
+#' # This also avoids the warning about non-normality, but the two-component error model
+#' # has significantly higher likelihood
+#' lrtest(fit.obs, fit.tc)
+#' parms(fit.tc)
+#' endpoints(fit.tc)
+#'
+#' # We can show a quick (only one replication) benchmark for this case, as we
+#' # have several alternative solution methods for the model. We skip
+#' # uncompiled deSolve, as it is so slow. More benchmarks are found in the
+#' # benchmark vignette
#' \dontrun{
-#' # deSolve is slower when no C compiler (gcc) was available during model generation
-#' print(system.time(fit.deSolve <- mkinfit(SFO_SFO, FOCUS_2006_D,
-#' solution_type = "deSolve")))
-#' parms(fit.deSolve)
-#' endpoints(fit.deSolve)
+#' if(require(rbenchmark)) {
+#' benchmark(replications = 1, order = "relative", columns = c("test", "relative", "elapsed"),
+#' deSolve_compiled = mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE, error_model = "tc",
+#' solution_type = "deSolve", use_compiled = TRUE),
+#' eigen = mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE, error_model = "tc",
+#' solution_type = "eigen"),
+#' analytical = mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE, error_model = "tc",
+#' solution_type = "analytical"))
+#' }
#' }
#'
-#' # Use stepwise fitting, using optimised parameters from parent only fit, FOMC
+#' # Use stepwise fitting, using optimised parameters from parent only fit, FOMC-SFO
#' \dontrun{
#' FOMC_SFO <- mkinmod(
#' parent = mkinsub("FOMC", "m1"),
#' m1 = mkinsub("SFO"))
-#' # Fit the model to the FOCUS example dataset D using defaults
-#' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D, quiet = TRUE)
-#' # Use starting parameters from parent only FOMC fit
-#' fit.FOMC = mkinfit("FOMC", FOCUS_2006_D, quiet = TRUE)
-#' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D, quiet = TRUE,
-#' parms.ini = fit.FOMC$bparms.ode)
-#'
-#' # Use stepwise fitting, using optimised parameters from parent only fit, SFORB
-#' SFORB_SFO <- mkinmod(
-#' parent = list(type = "SFORB", to = "m1", sink = TRUE),
-#' m1 = list(type = "SFO"))
-#' # Fit the model to the FOCUS example dataset D using defaults
-#' fit.SFORB_SFO <- mkinfit(SFORB_SFO, FOCUS_2006_D, quiet = TRUE)
-#' fit.SFORB_SFO.deSolve <- mkinfit(SFORB_SFO, FOCUS_2006_D, solution_type = "deSolve",
-#' quiet = TRUE)
-#' # Use starting parameters from parent only SFORB fit (not really needed in this case)
-#' fit.SFORB = mkinfit("SFORB", FOCUS_2006_D, quiet = TRUE)
-#' fit.SFORB_SFO <- mkinfit(SFORB_SFO, FOCUS_2006_D, parms.ini = fit.SFORB$bparms.ode, quiet = TRUE)
-#' }
-#'
-#' \dontrun{
-#' # Weighted fits, including IRLS (error_model = "obs")
-#' SFO_SFO.ff <- mkinmod(parent = mkinsub("SFO", "m1"),
-#' m1 = mkinsub("SFO"), use_of_ff = "max")
-#' f.noweight <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, quiet = TRUE)
-#' summary(f.noweight)
-#' f.obs <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, error_model = "obs", quiet = TRUE)
-#' summary(f.obs)
-#' f.tc <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, error_model = "tc", quiet = TRUE)
-#' summary(f.tc)
-#' }
+#' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_D, quiet = TRUE)
+#' # Again, we get a warning and try a more sophisticated error model
+#' fit.FOMC_SFO.tc <- mkinfit(FOMC_SFO, FOCUS_D, quiet = TRUE, error_model = "tc")
+#' # This model has a higher likelihood, but not significantly so
+#' lrtest(fit.tc, fit.FOMC_SFO.tc)
+#' # Also, the missing standard error for log_beta and the t-tests for alpha
+#' # and beta indicate overparameterisation
+#' summary(fit.FOMC_SFO.tc, data = FALSE)
#'
+#' # We can easily use starting parameters from the parent only fit (only for illustration)
+#' fit.FOMC = mkinfit("FOMC", FOCUS_2006_D, quiet = TRUE, error_model = "tc")
+#' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_D, quiet = TRUE,
+#' parms.ini = fit.FOMC$bparms.ode, error_model = "tc")
#'
#' @export
mkinfit <- function(mkinmod, observed,
@@ -716,6 +718,7 @@ mkinfit <- function(mkinmod, observed,
errparms <- fit_direct$par[errparms_index]
} else {
cost.current <- Inf # reset to avoid conflict with the OLS step
+ data_direct <- current_data # We need this later if it was better
}
}
if (error_model_algorithm != "direct") {
@@ -785,6 +788,7 @@ mkinfit <- function(mkinmod, observed,
fit$d_3_message <- d_3_messages["direct"]
degparms <- fit$par[degparms_index]
errparms <- fit$par[errparms_index]
+ current_data <- data_direct
}
}
}
@@ -914,6 +918,10 @@ mkinfit <- function(mkinmod, observed,
fit$errparms <- errparms
fit$df.residual <- n_observed - length(c(degparms, errparms))
+ # Check for normal distribution of residuals
+ fit$shapiro.p <- shapiro.test(residuals.mkinfit(fit, standardized = TRUE))$p.value
+ if (fit$shapiro.p < 0.05) warning("The p-value for the Shapiro-Wilk test of normality on standardized residuals is < 0.05")
+
fit$date <- date()
fit$version <- as.character(utils::packageVersion("mkin"))
fit$Rversion <- paste(R.version$major, R.version$minor, sep=".")
diff --git a/R/summary.mkinfit.R b/R/summary.mkinfit.R
index 2dc74bd7..2c291ffd 100644
--- a/R/summary.mkinfit.R
+++ b/R/summary.mkinfit.R
@@ -165,6 +165,7 @@ summary.mkinfit <- function(object, data = TRUE, distimes = TRUE, alpha = 0.05,
}
ans$bparms.ode <- object$bparms.ode
+ ans$shapiro.p <- object$shapiro.p
ep <- endpoints(object)
if (length(ep$ff) != 0)
ans$ff <- ep$ff
@@ -228,6 +229,10 @@ print.summary.mkinfit <- function(x, digits = max(3, getOption("digits") - 3), .
row.names = " "))
}
+ if (!is.null(x$shapiro.p) && x$shapiro.p < 0.05) {
+ warning("The p-value for the Shapiro-Wilk test of normality on standardized residuals is < 0.05")
+ }
+
cat("\nOptimised, transformed parameters with symmetric confidence intervals:\n")
print(signif(x$par, digits = digits))

Contact - Imprint