diff options
| author | Johannes Ranke <jranke@uni-bremen.de> | 2019-10-21 12:11:34 +0200 | 
|---|---|---|
| committer | Johannes Ranke <jranke@uni-bremen.de> | 2019-10-21 12:11:34 +0200 | 
| commit | 7624a2b8398b4ad665a3b0b622488e1893a5ee7c (patch) | |
| tree | 30e5bc32adc77de6540e68fa80a157f893c7770d | |
| parent | 8ce251e5ee619a240da2381eda58bc94a554ca37 (diff) | |
Refactor mkinfit, infrastructure work
mkinfit objects now include an ll() function to calculate the
log-likelihood. Part of the code was refactored, hopefully making it
easier to read and maintain. IRLS is currently the default algorithm for
the error model "obs", for no particular reason. This may be subject
to change when I get around to investigate.
Slow tests are now in a separate subdirectory and will probably
only be run by my own Makefile target.
Formatting of test logs is improved.
Roundtripping error model parameters works with a precision of 10% when
we use lots of replicates in the synthetic data (see slow tests). This
is not new in this commit, but as I think it is reasonable this
closes #7.
| -rw-r--r-- | .Rbuildignore | 1 | ||||
| -rw-r--r-- | DESCRIPTION | 2 | ||||
| -rw-r--r-- | GNUmakefile | 4 | ||||
| -rw-r--r-- | NEWS.md | 8 | ||||
| -rw-r--r-- | R/mkinerrmin.R | 7 | ||||
| -rw-r--r-- | R/mkinfit.R | 328 | ||||
| -rw-r--r-- | check.log | 31 | ||||
| -rw-r--r-- | docs/news/index.html | 5 | ||||
| -rw-r--r-- | docs/reference/mkinfit.html | 172 | ||||
| -rw-r--r-- | man/mkinfit.Rd | 18 | ||||
| -rw-r--r-- | test.log | 42 | ||||
| -rw-r--r-- | tests/testthat/DFOP_FOCUS_C_messages.txt | 2 | ||||
| -rw-r--r-- | tests/testthat/FOCUS_2006_D.csf | 2 | ||||
| -rw-r--r-- | tests/testthat/slow/test_parent_only.R (renamed from tests/testthat/test_parent_only.R) | 0 | ||||
| -rw-r--r-- | tests/testthat/slow/test_roundtrip_error_parameters.R | 141 | ||||
| -rw-r--r-- | tests/testthat/summary_DFOP_FOCUS_C.txt | 12 | ||||
| -rw-r--r-- | tests/testthat/test_confidence.R | 51 | ||||
| -rw-r--r-- | tests/testthat/test_error_models.R | 138 | ||||
| -rw-r--r-- | tests_slow.log | 11 | 
19 files changed, 535 insertions, 440 deletions
| diff --git a/.Rbuildignore b/.Rbuildignore index ab124258..1434847a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,6 +2,7 @@  ^build.log$  ^check.log$  ^test.log$ +^tests_slow.log$  ^test.R$  ^README.html$  ^mkin.Rcheck diff --git a/DESCRIPTION b/DESCRIPTION index bc8e61d6..9efa2b89 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: mkin  Type: Package  Title: Kinetic Evaluation of Chemical Degradation Data  Version: 0.9.49.6 -Date: 2019-09-02 +Date: 2019-10-19  Authors@R: c(person("Johannes", "Ranke", role = c("aut", "cre", "cph"),                      email = "jranke@uni-bremen.de",                      comment = c(ORCID = "0000-0003-4371-6538")), diff --git a/GNUmakefile b/GNUmakefile index 6b491993..0141b54c 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -72,6 +72,10 @@ test: install  	"$(RBIN)/Rscript" -e 'devtools::test()' 2>&1 | tee test.log  	sed -i -e "s/\r.*\r//" test.log +slowtests: install +	NOT_CRAN=true "$(RBIN)/Rscript" -e 'library(mkin); testthat::test_dir("tests/testthat/slow")' 2>&1 | tee tests_slow.log +	sed -i -e "s/\r.*\r//" tests_slow.log +  testcheck: test check  README.html: README.md @@ -1,5 +1,11 @@  # mkin 0.9.49.6 (unreleased) +- Move long-running tests to tests/testthat/slow with a separate test log. They currently take around 7 minutes on my system + +- 'mkinfit': Clean the code and return functions to calculate the log-likelihood and the sum of squared residuals + +- 'mkinfit': The default algorithm for fitting the 'obs' error model is now IRLS +  - Vignette 'twa.html': Add the maximum time weighted average formulas for the hockey stick model  - Support frameless plots ('frame = FALSE') @@ -42,7 +48,7 @@  - Add the function 'logLik.mkinfit' which makes it possible to calculate an AIC for mkinfit objects -- Add the function 'AIC.mmkin' to makeqit easy to compare columns of mmkin objects +- Add the function 'AIC.mmkin' to make it easy to compare columns of mmkin objects  - 'add_err': Respect the argument giving the number of replicates in the synthetic dataset diff --git a/R/mkinerrmin.R b/R/mkinerrmin.R index c0c6fad7..ce4877d2 100644 --- a/R/mkinerrmin.R +++ b/R/mkinerrmin.R @@ -1,4 +1,4 @@ -# Copyright (C) 2010-2014 Johannes Ranke +# Copyright (C) 2010-2019 Johannes Ranke  # Contact: jranke@uni-bremen.de  # This file is part of the R package mkin @@ -62,9 +62,8 @@ mkinerrmin <- function(fit, alpha = 0.05)      n.k.optim <- n.k.optim + length(grep(paste("^log_k", obs_var, sep="_"),                                           names(parms.optim)))      n.k__iore.optim <- length(grep(paste("^k__iore", obs_var, sep="_"), names(parms.optim))) -    n.k__iore.optim <- n.k__iore.optim + length(grep(paste("^log_k__iore", obs_var, -							 sep = "_"), -						   names(parms.optim))) +    n.k__iore.optim <- n.k__iore.optim + length(grep(paste("^log_k__iore", +          obs_var, sep = "_"), names(parms.optim)))      n.N.optim <- length(grep(paste("^N", obs_var, sep="_"), names(parms.optim))) diff --git a/R/mkinfit.R b/R/mkinfit.R index b5e69e67..7e2b8cac 100644 --- a/R/mkinfit.R +++ b/R/mkinfit.R @@ -34,7 +34,7 @@ mkinfit <- function(mkinmod, observed,    quiet = FALSE,
    atol = 1e-8, rtol = 1e-10, n.outtimes = 100,
    error_model = c("const", "obs", "tc"),
 -  error_model_algorithm = c("d_3", "direct", "twostep", "threestep", "fourstep", "IRLS", "OLS"),
 +  error_model_algorithm = c("auto", "d_3", "direct", "twostep", "threestep", "fourstep", "IRLS", "OLS"),
    reweight.tol = 1e-8, reweight.max.iter = 10,
    trace_parms = FALSE,
    ...)
 @@ -244,13 +244,21 @@ mkinfit <- function(mkinmod, observed,      }
    }
 -  # Get the error model
 +  # Get the error model and the algorithm for fitting
    err_mod <- match.arg(error_model)
    error_model_algorithm = match.arg(error_model_algorithm)
 +  if (error_model_algorithm == "OLS") {
 +    if (err_mod != "const") stop("OLS is only appropriate for constant variance")
 +  }
 +  if (error_model_algorithm == "auto") {
 +    error_model_algorithm = switch(err_mod,
 +      const = "OLS", obs = "IRLS", tc = "d_3")
 +  }
    errparm_names <- switch(err_mod,
      "const" = "sigma",
      "obs" = paste0("sigma_", obs_vars),
      "tc" = c("sigma_low", "rsd_high"))
 +  errparm_names_optim <- if (error_model_algorithm == "OLS") NULL else errparm_names
    # Define starting values for the error model
    if (err.ini[1] != "auto") {
 @@ -271,6 +279,11 @@ mkinfit <- function(mkinmod, observed,      }
      names(errparms) <- errparm_names
    }
 +  if (error_model_algorithm == "OLS") {
 +    errparms_optim <- NULL
 +  } else {
 +    errparms_optim <- errparms
 +  }
    # Define outtimes for model solution.
    # Include time points at which observed data are available
 @@ -278,49 +291,51 @@ mkinfit <- function(mkinmod, observed,                                                max(observed$time),
                                                length.out = n.outtimes))))
 -  # Define log-likelihood function for optimisation, including (back)transformations
 -  nlogLik <- function(P, trans = TRUE, OLS = FALSE, fixed_degparms = FALSE, fixed_errparms = FALSE, update_data = TRUE, ...)
 +  # Define the objective function for optimisation, including (back)transformations
 +  cost_function <- function(P, trans = TRUE, OLS = FALSE, fixed_degparms = FALSE, fixed_errparms = FALSE, update_data = TRUE, ...)
    {
      assign("calls", calls + 1, inherits = TRUE) # Increase the model solution counter
      # Trace parameter values if requested and if we are actually optimising
      if(trace_parms & update_data) cat(P, "\n")
 +    # Determine local parameter values for the cost estimation
      if (is.numeric(fixed_degparms)) {
 -      degparms <- fixed_degparms
 -      errparms <- P # This version of errparms is local to the function
 +      cost_degparms <- fixed_degparms
 +      cost_errparms <- P
        degparms_fixed = TRUE
      } else {
        degparms_fixed = FALSE
      }
      if (is.numeric(fixed_errparms)) {
 -      degparms <- P
 -      errparms <- fixed_errparms # Local to the function
 +      cost_degparms <- P
 +      cost_errparms <- fixed_errparms
        errparms_fixed = TRUE
      } else {
        errparms_fixed = FALSE
      }
      if (OLS) {
 -      degparms <- P
 +      cost_degparms <- P
 +      cost_errparms <- numeric(0)
      }
      if (!OLS & !degparms_fixed & !errparms_fixed) {
 -      degparms <- P[1:(length(P) - length(errparms))]
 -      errparms <- P[(length(degparms) + 1):length(P)]
 +      cost_degparms <- P[1:(length(P) - length(errparms))]
 +      cost_errparms <- P[(length(cost_degparms) + 1):length(P)]
      }
      # Initial states for t0
      if(length(state.ini.optim) > 0) {
 -      odeini <- c(degparms[1:length(state.ini.optim)], state.ini.fixed)
 +      odeini <- c(cost_degparms[1:length(state.ini.optim)], state.ini.fixed)
        names(odeini) <- c(state.ini.optim.boxnames, state.ini.fixed.boxnames)
      } else {
        odeini <- state.ini.fixed
        names(odeini) <- state.ini.fixed.boxnames
      }
 -    odeparms.optim <- degparms[(length(state.ini.optim) + 1):length(degparms)]
 +    odeparms.optim <- cost_degparms[(length(state.ini.optim) + 1):length(cost_degparms)]
      if (trans == TRUE) {
        odeparms <- c(odeparms.optim, transparms.fixed)
 @@ -342,53 +357,55 @@ mkinfit <- function(mkinmod, observed,      out_long <- mkin_wide_to_long(out, time = "time")
      if (err_mod == "const") {
 -      observed$std <- errparms["sigma"]
 +      observed$std <- if (OLS) NA else cost_errparms["sigma"]
      }
      if (err_mod == "obs") {
        std_names <- paste0("sigma_", observed$name)
 -      observed$std <- errparms[std_names]
 +      observed$std <- cost_errparms[std_names]
      }
      if (err_mod == "tc") {
        tmp <- merge(observed, out_long, by = c("time", "name"))
        tmp$name <- ordered(tmp$name, levels = obs_vars)
        tmp <- tmp[order(tmp$name, tmp$time), ]
 -      observed$std <- sqrt(errparms["sigma_low"]^2 + tmp$value.y^2 * errparms["rsd_high"]^2)
 +      observed$std <- sqrt(cost_errparms["sigma_low"]^2 + tmp$value.y^2 * cost_errparms["rsd_high"]^2)
      }
 -    data_log_lik <- merge(observed[c("name", "time", "value", "std")], out_long,
 +    cost_data <- merge(observed[c("name", "time", "value", "std")], out_long,
                           by = c("name", "time"), suffixes = c(".observed", ".predicted"))
      if (OLS) {
 -      nlogLik <- with(data_log_lik, sum((value.observed - value.predicted)^2))
 +      # Cost is the sum of squared residuals
 +      cost <- with(cost_data, sum((value.observed - value.predicted)^2))
      } else {
 -      nlogLik <- - with(data_log_lik,
 +      # Cost is the negative log-likelihood
 +      cost <- - with(cost_data,
          sum(dnorm(x = value.observed, mean = value.predicted, sd = std, log = TRUE)))
      }
 -    # We update the current likelihood and data during the optimisation, not
 +    # We update the current cost and data during the optimisation, not
      # during hessian calculations
      if (update_data) {
        assign("out_predicted", out_long, inherits = TRUE)
 -      assign("data_errmod", data_log_lik, inherits = TRUE)
 +      assign("current_data", cost_data, inherits = TRUE)
 -      if (nlogLik < nlogLik.current) {
 -        assign("nlogLik.current", nlogLik, inherits = TRUE)
 +      if (cost < cost.current) {
 +        assign("cost.current", cost, inherits = TRUE)
          if (!quiet) cat(ifelse(OLS, "Sum of squared residuals", "Negative log-likelihood"),
 -                        " at call ", calls, ": ", nlogLik.current, "\n", sep = "")
 +                        " at call ", calls, ": ", cost.current, "\n", sep = "")
        }
      }
 -    return(nlogLik)
 +    return(cost)
    }
 -  n_optim <- length(c(state.ini.optim, transparms.optim, errparm_names))
    names_optim <- c(names(state.ini.optim),
                     names(transparms.optim),
 -                   errparm_names)
 +                   errparm_names_optim)
 +  n_optim <- length(names_optim)
    # Define lower and upper bounds other than -Inf and Inf for parameters
    # for which no internal transformation is requested in the call to mkinfit
 -  # and for error model parameters
 +  # and for optimised error model parameters
    lower <- rep(-Inf, n_optim)
    upper <- rep(Inf, n_optim)
    names(lower) <- names(upper) <- names_optim
 @@ -416,7 +433,9 @@ mkinfit <- function(mkinmod, observed,    }
    if (err_mod == "const") {
 -    lower["sigma"] <- 0
 +    if (error_model_algorithm != "OLS") {
 +      lower["sigma"] <- 0
 +    }
    }
    if (err_mod == "obs") {
      index_sigma <- grep("^sigma_", names(lower))
 @@ -427,11 +446,11 @@ mkinfit <- function(mkinmod, observed,      lower["rsd_high"] <- 0
    }
 -  # Counter for likelihood evaluations
 +  # Counter for cost function evaluations
    calls = 0
 -  nlogLik.current <- Inf
 +  cost.current <- Inf
    out_predicted <- NA
 -  data_errmod <- NA
 +  current_data <- NA
    # Show parameter names if tracing is requested
    if(trace_parms) cat(names_optim, "\n")
 @@ -441,132 +460,127 @@ mkinfit <- function(mkinmod, observed,    # Do the fit and take the time until the hessians are calculated
    fit_time <- system.time({
      degparms <- c(state.ini.optim, transparms.optim)
 -
 -    if (err_mod == "const") {
 -      error_model_algorithm = "OLS"
 +    n_degparms <- length(degparms)
 +    degparms_index <- seq(1, n_degparms)
 +    errparms_index <- seq(n_degparms + 1, length.out = length(errparms))
 +
 +    if (error_model_algorithm == "d_3") {
 +      if (!quiet) message("Directly optimising the complete model")
 +      parms.start <- c(degparms, errparms)
 +      fit_direct <- nlminb(parms.start, cost_function,
 +        lower = lower[names(parms.start)],
 +        upper = upper[names(parms.start)],
 +        control = control, ...)
 +      fit_direct$logLik <- - cost.current
 +      if (error_model_algorithm == "direct") {
 +        degparms <- fit_direct$par[degparms_index]
 +        errparms <- fit_direct$par[errparms_index]
 +      } else {
 +        cost.current <- Inf # reset to avoid conflict with the OLS step
 +      }
 +    }
 +    if (error_model_algorithm != "direct") {
        if (!quiet) message("Ordinary least squares optimisation")
 -      fit <- nlminb(degparms, nlogLik, control = control,
 +      fit <- nlminb(degparms, cost_function, control = control,
          lower = lower[names(degparms)],
          upper = upper[names(degparms)], OLS = TRUE, ...)
        degparms <- fit$par
        # Get the maximum likelihood estimate for sigma at the optimum parameter values
 -      data_errmod$residual <- data_errmod$value.observed - data_errmod$value.predicted
 -      sigma_mle <- sqrt(sum(data_errmod$residual^2)/nrow(data_errmod))
 +      current_data$residual <- current_data$value.observed - current_data$value.predicted
 +      sigma_mle <- sqrt(sum(current_data$residual^2)/nrow(current_data))
 -      errparms <- c(sigma = sigma_mle)
 -      nlogLik.current <- nlogLik(c(degparms, errparms), OLS = FALSE)
 -      fit$logLik <- - nlogLik.current
 -    } else {
 -      if (error_model_algorithm == "d_3") {
 -        if (!quiet) message("Directly optimising the complete model")
 -        parms.start <- c(degparms, errparms)
 -        fit_direct <- nlminb(parms.start, nlogLik,
 -          lower = lower[names(parms.start)],
 -          upper = upper[names(parms.start)],
 -          control = control, ...)
 -        fit_direct$logLik <- - nlogLik.current
 -        nlogLik.current <- Inf # reset to avoid conflict with the OLS step
 +      # Use that estimate for the constant variance, or as first guess if err_mod = "obs"
 +      if (err_mod != "tc") {
 +        errparms[names(errparms)] <- sigma_mle
        }
 -      if (error_model_algorithm != "direct") {
 -        if (!quiet) message("Ordinary least squares optimisation")
 -        fit <- nlminb(degparms, nlogLik, control = control,
 -          lower = lower[names(degparms)],
 -          upper = upper[names(degparms)], OLS = TRUE, ...)
 -        degparms <- fit$par
 -        # Get the maximum likelihood estimate for sigma at the optimum parameter values
 -        data_errmod$residual <- data_errmod$value.observed - data_errmod$value.predicted
 -        sigma_mle <- sqrt(sum(data_errmod$residual^2)/nrow(data_errmod))
 +      fit$par <- c(fit$par, errparms)
 +
 +      cost.current <- cost_function(c(degparms, errparms), OLS = FALSE)
 +      fit$logLik <- - cost.current
 +    }
 +    if (error_model_algorithm %in% c("threestep", "fourstep", "d_3")) {
 +      if (!quiet) message("Optimising the error model")
 +      fit <- nlminb(errparms, cost_function, control = control,
 +        lower = lower[names(errparms)],
 +        upper = upper[names(errparms)],
 +        fixed_degparms = degparms, ...)
 +      errparms <- fit$par
 +    }
 +    if (error_model_algorithm == "fourstep") {
 +      if (!quiet) message("Optimising the degradation model")
 +      fit <- nlminb(degparms, cost_function, control = control,
 +        lower = lower[names(degparms)],
 +        upper = upper[names(degparms)],
 +        fixed_errparms = errparms, ...)
 +      degparms <- fit$par
 +    }
 +    if (error_model_algorithm %in%
 +      c("direct", "twostep", "threestep", "fourstep", "d_3")) {
 +      if (!quiet) message("Optimising the complete model")
 +      parms.start <- c(degparms, errparms)
 +      fit <- nlminb(parms.start, cost_function,
 +        lower = lower[names(parms.start)],
 +        upper = upper[names(parms.start)],
 +        control = control, ...)
 +      degparms <- fit$par[degparms_index]
 +      errparms <- fit$par[errparms_index]
 +      fit$logLik <- - cost.current
 -        nlogLik.current <- nlogLik(c(degparms, errparms), OLS = FALSE)
 -        fit$logLik <- - nlogLik.current
 +      if (error_model_algorithm == "d_3") {
 +        d_3_messages = c(
 +           same = "Direct fitting and three-step fitting yield approximately the same likelihood",
 +           threestep = "Three-step fitting yielded a higher likelihood than direct fitting",
 +           direct = "Direct fitting yielded a higher likelihood than three-step fitting")
 +        rel_diff <- abs((fit_direct$logLik - fit$logLik))/-mean(c(fit_direct$logLik, fit$logLik))
 +        if (rel_diff < 0.0001) {
 +          if (!quiet) message(d_3_messages["same"])
 +          fit$d_3_message <- d_3_messages["same"]
 +        } else {
 +          if (fit$logLik > fit_direct$logLik) {
 +            if (!quiet) message(d_3_messages["threestep"])
 +            fit$d_3_message <- d_3_messages["threestep"]
 +          } else {
 +            if (!quiet) message(d_3_messages["direct"])
 +            fit <- fit_direct
 +            fit$d_3_message <- d_3_messages["direct"]
 +          }
 +        }
        }
 -      if (error_model_algorithm %in% c("threestep", "fourstep", "d_3")) {
 +    }
 +    if (err_mod != "const" & error_model_algorithm == "IRLS") {
 +      reweight.diff <- 1
 +      n.iter <- 0
 +      errparms_last <- errparms
 +
 +      while (reweight.diff > reweight.tol &
 +             n.iter < reweight.max.iter) {
 +
          if (!quiet) message("Optimising the error model")
 -        fit <- nlminb(errparms, nlogLik, control = control,
 +        fit <- nlminb(errparms, cost_function, control = control,
            lower = lower[names(errparms)],
            upper = upper[names(errparms)],
            fixed_degparms = degparms, ...)
          errparms <- fit$par
 -      }
 -      if (error_model_algorithm == "fourstep") {
 +
          if (!quiet) message("Optimising the degradation model")
 -        fit <- nlminb(degparms, nlogLik, control = control,
 +        fit <- nlminb(degparms, cost_function, control = control,
            lower = lower[names(degparms)],
            upper = upper[names(degparms)],
            fixed_errparms = errparms, ...)
          degparms <- fit$par
 -      }
 -      if (error_model_algorithm %in% c("direct", "twostep", "threestep",
 -                                       "fourstep", "d_3")) {
 -        if (!quiet) message("Optimising the complete model")
 -        parms.start <- c(degparms, errparms)
 -        fit <- nlminb(parms.start, nlogLik,
 -          lower = lower[names(parms.start)],
 -          upper = upper[names(parms.start)],
 -          control = control, ...)
 -        fit$logLik <- - nlogLik.current
 -        d_3_messages = c(
 -           same = "Direct fitting and three-step fitting yield approximately the same likelihood",
 -           threestep = "Three-step fitting yielded a higher likelihood than direct fitting",
 -           direct = "Direct fitting yielded a higher likelihood than three-step fitting")
 -        if (error_model_algorithm == "d_3") {
 -          rel_diff <- abs((fit_direct$logLik - fit$logLik))/-mean(c(fit_direct$logLik, fit$logLik))
 -          if (rel_diff < 0.0001) {
 -            if (!quiet) message(d_3_messages["same"])
 -            fit$d_3_message <- d_3_messages["same"]
 -          } else {
 -            if (fit$logLik > fit_direct$logLik) {
 -              if (!quiet) message(d_3_messages["threestep"])
 -              fit$d_3_message <- d_3_messages["threestep"]
 -            } else {
 -              if (!quiet) message(d_3_messages["direct"])
 -              fit <- fit_direct
 -              fit$d_3_message <- d_3_messages["direct"]
 -            }
 -          }
 -        }
 -      }
 -      if (err_mod != "const" & error_model_algorithm == "IRLS") {
 -        reweight.diff <- 1
 -        n.iter <- 0
 +        reweight.diff <- dist(rbind(errparms, errparms_last))
          errparms_last <- errparms
 -        while (reweight.diff > reweight.tol &
 -               n.iter < reweight.max.iter) {
 -
 -          if (!quiet) message("Optimising the error model")
 -          fit <- nlminb(errparms, nlogLik, control = control,
 -            lower = lower[names(errparms)],
 -            upper = upper[names(errparms)],
 -            fixed_degparms = degparms, ...)
 -          errparms <- fit$par
 -
 -          if (!quiet) message("Optimising the degradation model")
 -          fit <- nlminb(degparms, nlogLik, control = control,
 -            lower = lower[names(degparms)],
 -            upper = upper[names(degparms)],
 -            fixed_errparms = errparms, ...)
 -          degparms <- fit$par
 -
 -          reweight.diff <- dist(rbind(errparms, errparms_last))
 -          errparms_last <- errparms
 -
 -          fit$par <- c(fit$par, errparms)
 -          nlogLik.current <- nlogLik(c(degparms, errparms), OLS = FALSE)
 -          fit$logLik <- - nlogLik.current
 -        }
 +        fit$par <- c(fit$par, errparms)
 +        cost.current <- cost_function(c(degparms, errparms), OLS = FALSE)
 +        fit$logLik <- - cost.current
        }
      }
 -    fit$error_model_algorithm <- error_model_algorithm
 -    # We include the error model in the parameter uncertainty analysis, also
 -    # for constant variance, to get a confidence interval for it
 -    if (err_mod == "const") {
 -      fit$par <- c(fit$par, sigma = sigma_mle)
 -    }
 -    fit$hessian <- try(numDeriv::hessian(nlogLik, fit$par, update_data = FALSE), silent = TRUE)
 +    fit$hessian <- try(numDeriv::hessian(cost_function, c(degparms, errparms), OLS = FALSE,
 +        update_data = FALSE), silent = TRUE)
      # Backtransform parameters
      bparms.optim = backtransform_odeparms(fit$par, mkinmod,
 @@ -575,10 +589,12 @@ mkinfit <- function(mkinmod, observed,      bparms.fixed = c(state.ini.fixed, parms.fixed)
      bparms.all = c(bparms.optim, parms.fixed)
 -    fit$hessian_notrans <- try(numDeriv::hessian(nlogLik, c(bparms.optim, fit$par[names(errparms)]),
 -                                       trans = FALSE, update_data = FALSE), silent = TRUE)
 +    fit$hessian_notrans <- try(numDeriv::hessian(cost_function, c(bparms.all, errparms),
 +        OLS = FALSE, trans = FALSE, update_data = FALSE), silent = TRUE)
    })
 +  fit$error_model_algorithm <- error_model_algorithm
 +
    if (fit$convergence != 0) {
      fit$warning = paste0("Optimisation did not converge:\n", fit$message)
      warning(fit$warning)
 @@ -604,18 +620,24 @@ mkinfit <- function(mkinmod, observed,    fit$obs_vars <- obs_vars
    fit$predicted <- out_predicted
 -  # Attach the negative log-likelihood function for post-hoc parameter uncertainty analysis
 -  fit$nlogLik <- nlogLik
 +  # Residual sum of squares as a function of the fitted parameters
 +  fit$rss <- function(P) cost_function(P, OLS = TRUE, update_data = FALSE)
 +
 +  # Log-likelihood with possibility to fix degparms or errparms
 +  fit$ll <- function(P, fixed_degparms = FALSE, fixed_errparms = FALSE) {
 +    - cost_function(P, fixed_degparms = fixed_degparms,
 +      fixed_errparms = fixed_errparms, OLS = FALSE, update_data = FALSE)
 +  }
    # Collect initial parameter values in three dataframes
    fit$start <- data.frame(value = c(state.ini.optim,
 -                                    parms.optim, errparms))
 +                                    parms.optim, errparms_optim))
    fit$start$type = c(rep("state", length(state.ini.optim)),
                       rep("deparm", length(parms.optim)),
 -                     rep("error", length(errparms)))
 +                     rep("error", length(errparms_optim)))
    fit$start_transformed = data.frame(
 -      value = c(state.ini.optim, transparms.optim, errparms),
 +      value = c(state.ini.optim, transparms.optim, errparms_optim),
        lower = lower,
        upper = upper)
 @@ -624,14 +646,14 @@ mkinfit <- function(mkinmod, observed,                       rep("deparm", length(parms.fixed)))
    # Sort observed, predicted and residuals
 -  data_errmod$name <- ordered(data_errmod$name, levels = obs_vars)
 +  current_data$name <- ordered(current_data$name, levels = obs_vars)
 -  data <- data_errmod[order(data_errmod$name, data_errmod$time), ]
 +  ordered_data <- current_data[order(current_data$name, current_data$time), ]
 -  fit$data <- data.frame(time = data$time,
 -                         variable = data$name,
 -                         observed = data$value.observed,
 -                         predicted = data$value.predicted)
 +  fit$data <- data.frame(time = ordered_data$time,
 +                         variable = ordered_data$name,
 +                         observed = ordered_data$value.observed,
 +                         predicted = ordered_data$value.predicted)
    fit$data$residual <- fit$data$observed - fit$data$predicted
 @@ -649,8 +671,8 @@ mkinfit <- function(mkinmod, observed,                          state.ini.fixed)
    names(fit$bparms.state) <- gsub("_0$", "", names(fit$bparms.state))
 -  fit$errparms.optim <- fit$par[names(errparms)]
 -  fit$df.residual <- n_observed - n_optim
 +  fit$errparms <- errparms
 +  fit$df.residual <- n_observed - length(c(degparms, errparms))
    fit$date <- date()
    fit$version <- as.character(utils::packageVersion("mkin"))
 @@ -664,7 +686,7 @@ summary.mkinfit <- function(object, data = TRUE, distimes = TRUE, alpha = 0.05,    param  <- object$par
    pnames <- names(param)
    bpnames <- names(object$bparms.optim)
 -  epnames <- names(object$errparms.optim)
 +  epnames <- names(object$errparms)
    p      <- length(param)
    mod_vars <- names(object$mkinmod$diffs)
    covar  <- try(solve(object$hessian), silent = TRUE)
 @@ -736,9 +758,9 @@ summary.mkinfit <- function(object, data = TRUE, distimes = TRUE, alpha = 0.05,      use_of_ff = object$mkinmod$use_of_ff,
      error_model_algorithm = object$error_model_algorithm,
      df = c(p, rdf),
 -    cov.unscaled = covar,
 +    covar = covar,
 +    covar_notrans = covar_notrans,
      err_mod = object$err_mod,
 -    #cov.scaled = covar * resvar,
      niter = object$iterations,
      calls = object$calls,
      time = object$time,
 @@ -760,8 +782,8 @@ summary.mkinfit <- function(object, data = TRUE, distimes = TRUE, alpha = 0.05,    ans$errmin <- mkinerrmin(object, alpha = 0.05)
    if (object$calls > 0) {
 -    if (!is.null(ans$cov.unscaled)){
 -      Corr <- cov2cor(ans$cov.unscaled)
 +    if (!is.null(ans$covar)){
 +      Corr <- cov2cor(ans$covar)
        rownames(Corr) <- colnames(Corr) <- rownames(ans$par)
        ans$Corr <- Corr
      } else {
 @@ -831,7 +853,7 @@ print.summary.mkinfit <- function(x, digits = max(3, getOption("digits") - 3), .    if (x$calls > 0) {
      cat("\nParameter correlation:\n")
 -    if (!is.null(x$cov.unscaled)){
 +    if (!is.null(x$covar)){
        print(x$Corr, digits = digits, ...)
      } else {
        cat("No covariance matrix")
 @@ -24,27 +24,9 @@ Maintainer: ‘Johannes Ranke <jranke@uni-bremen.de>’  * checking for future file timestamps ... OK  * checking ‘build’ directory ... OK  * checking DESCRIPTION meta-information ... OK -* checking top-level files ... WARNING -Conversion of ‘README.md’ failed: -[WARNING] This document format requires a nonempty <title> element. -  Please specify either ‘title’ or ‘pagetitle’ in the metadata. -  Falling back to ‘README’ -Could not fetch https://www.r-pkg.org/badges/version/mkin -HttpExceptionRequest Request { -  host                 = "www.r-pkg.org" -  port                 = 443 -  secure               = True -  requestHeaders       = [] -  path                 = "/badges/version/mkin" -  queryString          = "" -  method               = "GET" -  proxy                = Nothing -  rawBody              = False -  redirectCount        = 10 -  responseTimeout      = ResponseTimeoutDefault -  requestVersion       = HTTP/1.1 -} - ConnectionTimeout +* checking top-level files ... NOTE +Non-standard file/directory found at top level: +  ‘tests_slow.log’  * checking for left-over files ... OK  * checking index information ... OK  * checking package subdirectories ... OK @@ -76,7 +58,10 @@ HttpExceptionRequest Request {  * checking data for ASCII and uncompressed saves ... OK  * checking installed files from ‘inst/doc’ ... OK  * checking files in ‘vignettes’ ... OK -* checking examples ... OK +* checking examples ... NOTE +Examples with CPU or elapsed time > 5s +                             user system elapsed +synthetic_data_for_UBA_2014 9.778  0.239  10.019  * checking for unstated dependencies in ‘tests’ ... OK  * checking tests ... SKIPPED  * checking for unstated dependencies in vignettes ... OK @@ -85,7 +70,7 @@ HttpExceptionRequest Request {  * checking PDF version of manual ... OK  * DONE -Status: 1 WARNING +Status: 2 NOTEs  See    ‘/home/jranke/git/mkin/mkin.Rcheck/00check.log’  for details. diff --git a/docs/news/index.html b/docs/news/index.html index 058205de..d3bee7e2 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -131,6 +131,9 @@  <a href="#mkin-0-9-49-6-unreleased" class="anchor"></a>mkin 0.9.49.6 (unreleased)<small> Unreleased </small>  </h1>  <ul> +<li><p>Move long-running tests to tests/testthat/slow with a separate test log. They currently take around 7 minutes on my system</p></li> +<li><p>‘mkinfit’: Clean the code and return functions to calculate the log-likelihood and the sum of squared residuals</p></li> +<li><p>‘mkinfit’: The default algorithm for fitting the ‘obs’ error model is now IRLS</p></li>  <li><p>Vignette ‘twa.html’: Add the maximum time weighted average formulas for the hockey stick model</p></li>  <li><p>Support frameless plots (‘frame = FALSE’)</p></li>  <li><p>Support to suppress the chi2 error level (‘show_errmin = FALSE’) in ‘plot.mmkin’</p></li> @@ -164,7 +167,7 @@  </h1>  <ul>  <li><p>Add the function ‘logLik.mkinfit’ which makes it possible to calculate an AIC for mkinfit objects</p></li> -<li><p>Add the function ‘AIC.mmkin’ to makeqit easy to compare columns of mmkin objects</p></li> +<li><p>Add the function ‘AIC.mmkin’ to make it easy to compare columns of mmkin objects</p></li>  <li><p>‘add_err’: Respect the argument giving the number of replicates in the synthetic dataset</p></li>  <li><p>‘max_twa_parent’: Support maximum time weighted average concentration calculations for the hockey stick (HS) model</p></li>  <li><p>‘mkinpredict’: Make the function generic and create a method for mkinfit objects</p></li> diff --git a/docs/reference/mkinfit.html b/docs/reference/mkinfit.html index 0767d2f8..e07d1f13 100644 --- a/docs/reference/mkinfit.html +++ b/docs/reference/mkinfit.html @@ -167,8 +167,8 @@ Per default, parameters in the kinetic models are internally transformed in    <span class='kw'>quiet</span> <span class='kw'>=</span> <span class='fl'>FALSE</span>,    <span class='kw'>atol</span> <span class='kw'>=</span> <span class='fl'>1e-8</span>, <span class='kw'>rtol</span> <span class='kw'>=</span> <span class='fl'>1e-10</span>, <span class='kw'>n.outtimes</span> <span class='kw'>=</span> <span class='fl'>100</span>,    <span class='kw'>error_model</span> <span class='kw'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/c.html'>c</a></span>(<span class='st'>"const"</span>, <span class='st'>"obs"</span>, <span class='st'>"tc"</span>), -  <span class='kw'>error_model_algorithm</span> <span class='kw'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/c.html'>c</a></span>(<span class='st'>"d_3"</span>, <span class='st'>"direct"</span>, <span class='st'>"twostep"</span>, <span class='st'>"threestep"</span>, <span class='st'>"fourstep"</span>, <span class='st'>"IRLS"</span>, -                            <span class='st'>"OLS"</span>), +  <span class='kw'>error_model_algorithm</span> <span class='kw'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/c.html'>c</a></span>(<span class='st'>"auto"</span>, <span class='st'>"d_3"</span>, <span class='st'>"direct"</span>, <span class='st'>"twostep"</span>, <span class='st'>"threestep"</span>, +    <span class='st'>"fourstep"</span>, <span class='st'>"IRLS"</span>, <span class='st'>"OLS"</span>),    <span class='kw'>reweight.tol</span> <span class='kw'>=</span> <span class='fl'>1e-8</span>, <span class='kw'>reweight.max.iter</span> <span class='kw'>=</span> <span class='fl'>10</span>,    <span class='kw'>trace_parms</span> <span class='kw'>=</span> <span class='fl'>FALSE</span>, <span class='no'>...</span>)</pre> @@ -330,11 +330,12 @@ Per default, parameters in the kinetic models are internally transformed in      </tr>      <tr>        <th>error_model_algorithm</th> -      <td><p>If the error model is "const", the error model algorithm is ignored, -    because no special algorithm is needed and unweighted (also known as -    ordinary) least squares fitting (listed as "OLS" in the summary) can be -    applied.</p> -<p>The default algorithm "d_3" will directly minimize the negative +      <td><p>If "auto", the selected algorithm depends on the error model.  +    If the error model is "const", nonlinear least squares fitting ("OLS") is +    selected. If the error model is "obs", iteratively reweighted least squares +    fitting ("IRLS") is selected. If the error model is "tc", the "d_3"  +    algorithm is selected.</p> +<p>The algorithm "d_3" will directly minimize the negative      log-likelihood and - independently - also use the three step algorithm      described below. The fit with the higher likelihood is returned.</p>  <p>The algorithm "direct" will directly minimize the negative @@ -354,9 +355,7 @@ Per default, parameters in the kinetic models are internally transformed in      unweighted least squares, and then iterates optimization of the error model      parameters and subsequent      optimization of the degradation model using those error model parameters, -    until the error model parameters converge.</p> -<p>The algorithm "OLS" (Ordinary Least Squares) is automatically selected when -    the error model is "const" and results in an unweighted least squares fit.</p></td> +    until the error model parameters converge.</p></td>      </tr>      <tr>        <th>reweight.tol</th> @@ -408,33 +407,31 @@ Per default, parameters in the kinetic models are internally transformed in  <span class='no'>fit</span> <span class='kw'><-</span> <span class='fu'>mkinfit</span>(<span class='st'>"FOMC"</span>, <span class='no'>FOCUS_2006_C</span>, <span class='kw'>quiet</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>)  <span class='fu'><a href='https://rdrr.io/r/base/summary.html'>summary</a></span>(<span class='no'>fit</span>)</div><div class='output co'>#> mkin version used for fitting:    0.9.49.6   #> R version used for fitting:       3.6.1  -#> Date of fit:     Thu Sep 19 09:50:54 2019  -#> Date of summary: Thu Sep 19 09:50:54 2019  +#> Date of fit:     Mon Oct 21 12:07:39 2019  +#> Date of summary: Mon Oct 21 12:07:39 2019   #>   #> Equations:  #> d_parent/dt = - (alpha/beta) * 1/((time/beta) + 1) * parent  #>   #> Model predictions using solution type analytical   #>  -#> Fitted using 222 model solutions performed in 0.458 s +#> Fitted using 222 model solutions performed in 0.459 s  #>   #> Error model: Constant variance   #>   #> Error model algorithm: OLS   #>   #> Starting values for parameters to be optimised: -#>              value   type -#> parent_0 85.100000  state -#> alpha     1.000000 deparm -#> beta     10.000000 deparm -#> sigma     1.857444  error +#>          value   type +#> parent_0  85.1  state +#> alpha      1.0 deparm +#> beta      10.0 deparm  #>   #> Starting values for the transformed parameters actually optimised:  #>               value lower upper  #> parent_0  85.100000  -Inf   Inf  #> log_alpha  0.000000  -Inf   Inf  #> log_beta   2.302585  -Inf   Inf -#> sigma      1.857444     0   Inf  #>   #> Fixed parameter values:  #> None @@ -490,7 +487,7 @@ Per default, parameters in the kinetic models are internally transformed in    <span class='kw'>m1</span> <span class='kw'>=</span> <span class='fu'><a href='mkinsub.html'>mkinsub</a></span>(<span class='st'>"SFO"</span>))</div><div class='output co'>#> <span class='message'>Successfully compiled differential equation model from auto-generated C code.</span></div><div class='input'><span class='co'># Fit the model to the FOCUS example dataset D using defaults</span>  <span class='fu'><a href='https://rdrr.io/r/base/print.html'>print</a></span>(<span class='fu'><a href='https://rdrr.io/r/base/system.time.html'>system.time</a></span>(<span class='no'>fit</span> <span class='kw'><-</span> <span class='fu'>mkinfit</span>(<span class='no'>SFO_SFO</span>, <span class='no'>FOCUS_2006_D</span>,                             <span class='kw'>solution_type</span> <span class='kw'>=</span> <span class='st'>"eigen"</span>, <span class='kw'>quiet</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>)))</div><div class='output co'>#> <span class='warning'>Warning: Observations with value of zero were removed from the data</span></div><div class='output co'>#>        User      System verstrichen  -#>       1.479       0.002       1.482 </div><div class='input'><span class='fu'><a href='https://rdrr.io/r/stats/coef.html'>coef</a></span>(<span class='no'>fit</span>)</div><div class='output co'>#> NULL</div><div class='input'><span class='fu'><a href='endpoints.html'>endpoints</a></span>(<span class='no'>fit</span>)</div><div class='output co'>#> $ff +#>       1.462       0.000       1.463 </div><div class='input'><span class='fu'><a href='https://rdrr.io/r/stats/coef.html'>coef</a></span>(<span class='no'>fit</span>)</div><div class='output co'>#> NULL</div><div class='input'><span class='fu'><a href='endpoints.html'>endpoints</a></span>(<span class='no'>fit</span>)</div><div class='output co'>#> $ff  #> parent_sink   parent_m1     m1_sink   #>    0.485524    0.514476    1.000000   #>  @@ -563,7 +560,7 @@ Per default, parameters in the kinetic models are internally transformed in  #> Sum of squared residuals at call 126: 371.2134  #> Sum of squared residuals at call 135: 371.2134  #> Negative log-likelihood at call 145: 97.22429</div><div class='output co'>#> <span class='message'>Optimisation successfully terminated.</span></div><div class='output co'>#>        User      System verstrichen  -#>       1.053       0.000       1.054 </div><div class='input'><span class='fu'><a href='https://rdrr.io/r/stats/coef.html'>coef</a></span>(<span class='no'>fit.deSolve</span>)</div><div class='output co'>#> NULL</div><div class='input'><span class='fu'><a href='endpoints.html'>endpoints</a></span>(<span class='no'>fit.deSolve</span>)</div><div class='output co'>#> $ff +#>        1.04        0.00        1.04 </div><div class='input'><span class='fu'><a href='https://rdrr.io/r/stats/coef.html'>coef</a></span>(<span class='no'>fit.deSolve</span>)</div><div class='output co'>#> NULL</div><div class='input'><span class='fu'><a href='endpoints.html'>endpoints</a></span>(<span class='no'>fit.deSolve</span>)</div><div class='output co'>#> $ff  #> parent_sink   parent_m1     m1_sink   #>    0.485524    0.514476    1.000000   #>  @@ -599,8 +596,8 @@ Per default, parameters in the kinetic models are internally transformed in  <span class='no'>SFO_SFO.ff</span> <span class='kw'><-</span> <span class='fu'><a href='mkinmod.html'>mkinmod</a></span>(<span class='kw'>parent</span> <span class='kw'>=</span> <span class='fu'><a href='mkinsub.html'>mkinsub</a></span>(<span class='st'>"SFO"</span>, <span class='st'>"m1"</span>),                        <span class='kw'>m1</span> <span class='kw'>=</span> <span class='fu'><a href='mkinsub.html'>mkinsub</a></span>(<span class='st'>"SFO"</span>), <span class='kw'>use_of_ff</span> <span class='kw'>=</span> <span class='st'>"max"</span>)</div><div class='output co'>#> <span class='message'>Successfully compiled differential equation model from auto-generated C code.</span></div><div class='input'><span class='no'>f.noweight</span> <span class='kw'><-</span> <span class='fu'>mkinfit</span>(<span class='no'>SFO_SFO.ff</span>, <span class='no'>FOCUS_2006_D</span>, <span class='kw'>quiet</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>)</div><div class='output co'>#> <span class='warning'>Warning: Observations with value of zero were removed from the data</span></div><div class='input'><span class='fu'><a href='https://rdrr.io/r/base/summary.html'>summary</a></span>(<span class='no'>f.noweight</span>)</div><div class='output co'>#> mkin version used for fitting:    0.9.49.6   #> R version used for fitting:       3.6.1  -#> Date of fit:     Thu Sep 19 09:51:10 2019  -#> Date of summary: Thu Sep 19 09:51:10 2019  +#> Date of fit:     Mon Oct 21 12:07:54 2019  +#> Date of summary: Mon Oct 21 12:07:54 2019   #>   #> Equations:  #> d_parent/dt = - k_parent * parent @@ -608,19 +605,18 @@ Per default, parameters in the kinetic models are internally transformed in  #>   #> Model predictions using solution type deSolve   #>  -#> Fitted using 421 model solutions performed in 1.138 s +#> Fitted using 421 model solutions performed in 1.07 s  #>   #> Error model: Constant variance   #>   #> Error model algorithm: OLS   #>   #> Starting values for parameters to be optimised: -#>                     value   type -#> parent_0       100.750000  state -#> k_parent         0.100000 deparm -#> k_m1             0.100100 deparm -#> f_parent_to_m1   0.500000 deparm -#> sigma            3.125504  error +#>                   value   type +#> parent_0       100.7500  state +#> k_parent         0.1000 deparm +#> k_m1             0.1001 deparm +#> f_parent_to_m1   0.5000 deparm  #>   #> Starting values for the transformed parameters actually optimised:  #>                     value lower upper @@ -628,7 +624,6 @@ Per default, parameters in the kinetic models are internally transformed in  #> log_k_parent    -2.302585  -Inf   Inf  #> log_k_m1        -2.301586  -Inf   Inf  #> f_parent_ilr_1   0.000000  -Inf   Inf -#> sigma            3.125504     0   Inf  #>   #> Fixed parameter values:  #>      value  type @@ -718,8 +713,8 @@ Per default, parameters in the kinetic models are internally transformed in  #>   120       m1    25.15  28.78984 -3.640e+00  #>   120       m1    33.31  28.78984  4.520e+00</div><div class='input'><span class='no'>f.obs</span> <span class='kw'><-</span> <span class='fu'>mkinfit</span>(<span class='no'>SFO_SFO.ff</span>, <span class='no'>FOCUS_2006_D</span>, <span class='kw'>error_model</span> <span class='kw'>=</span> <span class='st'>"obs"</span>, <span class='kw'>quiet</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>)</div><div class='output co'>#> <span class='warning'>Warning: Observations with value of zero were removed from the data</span></div><div class='input'><span class='fu'><a href='https://rdrr.io/r/base/summary.html'>summary</a></span>(<span class='no'>f.obs</span>)</div><div class='output co'>#> mkin version used for fitting:    0.9.49.6   #> R version used for fitting:       3.6.1  -#> Date of fit:     Thu Sep 19 09:51:12 2019  -#> Date of summary: Thu Sep 19 09:51:12 2019  +#> Date of fit:     Mon Oct 21 12:07:56 2019  +#> Date of summary: Mon Oct 21 12:07:56 2019   #>   #> Equations:  #> d_parent/dt = - k_parent * parent @@ -727,21 +722,20 @@ Per default, parameters in the kinetic models are internally transformed in  #>   #> Model predictions using solution type deSolve   #>  -#> Fitted using 979 model solutions performed in 2.565 s +#> Fitted using 897 model solutions performed in 2.363 s  #>   #> Error model: Variance unique to each observed variable   #>  -#> Error model algorithm: d_3  -#> Direct fitting and three-step fitting yield approximately the same likelihood  +#> Error model algorithm: IRLS   #>   #> Starting values for parameters to be optimised: -#>                     value   type -#> parent_0       100.750000  state -#> k_parent         0.100000 deparm -#> k_m1             0.100100 deparm -#> f_parent_to_m1   0.500000 deparm -#> sigma_parent     3.398909  error -#> sigma_m1         2.857157  error +#>                   value   type +#> parent_0       100.7500  state +#> k_parent         0.1000 deparm +#> k_m1             0.1001 deparm +#> f_parent_to_m1   0.5000 deparm +#> sigma_parent     3.0000  error +#> sigma_m1         3.0000  error  #>   #> Starting values for the transformed parameters actually optimised:  #>                     value lower upper @@ -749,8 +743,8 @@ Per default, parameters in the kinetic models are internally transformed in  #> log_k_parent    -2.302585  -Inf   Inf  #> log_k_m1        -2.301586  -Inf   Inf  #> f_parent_ilr_1   0.000000  -Inf   Inf -#> sigma_parent     3.398909     0   Inf -#> sigma_m1         2.857157     0   Inf +#> sigma_parent     3.000000     0   Inf +#> sigma_m1         3.000000     0   Inf  #>   #> Fixed parameter values:  #>      value  type @@ -767,16 +761,16 @@ Per default, parameters in the kinetic models are internally transformed in  #>   #> Parameter correlation:  #>                parent_0 log_k_parent log_k_m1 f_parent_ilr_1 sigma_parent -#> parent_0        1.00000      0.51078 -0.19133       -0.59997     0.035670 -#> log_k_parent    0.51078      1.00000 -0.37458       -0.59239     0.069833 -#> log_k_m1       -0.19133     -0.37458  1.00000        0.74398    -0.026158 -#> f_parent_ilr_1 -0.59997     -0.59239  0.74398        1.00000    -0.041369 -#> sigma_parent    0.03567      0.06983 -0.02616       -0.04137     1.000000 -#> sigma_m1       -0.03385     -0.06627  0.02482        0.03926    -0.004628 +#> parent_0        1.00000      0.51078 -0.19132       -0.59997     0.035676 +#> log_k_parent    0.51078      1.00000 -0.37457       -0.59239     0.069834 +#> log_k_m1       -0.19132     -0.37457  1.00000        0.74398    -0.026158 +#> f_parent_ilr_1 -0.59997     -0.59239  0.74398        1.00000    -0.041371 +#> sigma_parent    0.03568      0.06983 -0.02616       -0.04137     1.000000 +#> sigma_m1       -0.03385     -0.06626  0.02482        0.03926    -0.004628  #>                 sigma_m1  #> parent_0       -0.033847  #> log_k_parent   -0.066265 -#> log_k_m1        0.024823 +#> log_k_m1        0.024824  #> f_parent_ilr_1  0.039256  #> sigma_parent   -0.004628  #> sigma_m1        1.000000 @@ -790,7 +784,7 @@ Per default, parameters in the kinetic models are internally transformed in  #> k_parent        0.098970  22.850 1.099e-21  0.090530 1.082e-01  #> k_m1            0.005245   8.046 1.732e-09  0.004072 6.756e-03  #> f_parent_to_m1  0.513600  23.560 4.352e-22  0.469300 5.578e-01 -#> sigma_parent    3.401000   5.985 5.662e-07  2.244000 4.559e+00 +#> sigma_parent    3.401000   5.985 5.661e-07  2.244000 4.559e+00  #> sigma_m1        2.855000   6.311 2.215e-07  1.934000 3.777e+00  #>   #> FOCUS Chi2 error levels in percent: @@ -811,47 +805,47 @@ Per default, parameters in the kinetic models are internally transformed in  #>   #> Data:  #>  time variable observed predicted   residual -#>     0   parent    99.46  99.65417 -1.942e-01 -#>     0   parent   102.04  99.65417  2.386e+00 -#>     1   parent    93.50  90.26332  3.237e+00 -#>     1   parent    92.50  90.26332  2.237e+00 -#>     3   parent    63.23  74.05306 -1.082e+01 -#>     3   parent    68.99  74.05306 -5.063e+00 -#>     7   parent    52.32  49.84325  2.477e+00 -#>     7   parent    55.13  49.84325  5.287e+00 +#>     0   parent    99.46  99.65420 -1.942e-01 +#>     0   parent   102.04  99.65420  2.386e+00 +#>     1   parent    93.50  90.26335  3.237e+00 +#>     1   parent    92.50  90.26335  2.237e+00 +#>     3   parent    63.23  74.05308 -1.082e+01 +#>     3   parent    68.99  74.05308 -5.063e+00 +#>     7   parent    52.32  49.84326  2.477e+00 +#>     7   parent    55.13  49.84326  5.287e+00  #>    14   parent    27.27  24.92971  2.340e+00  #>    14   parent    26.64  24.92971  1.710e+00  #>    21   parent    11.50  12.46890 -9.689e-01  #>    21   parent    11.64  12.46890 -8.289e-01  #>    35   parent     2.85   3.11925 -2.692e-01  #>    35   parent     2.91   3.11925 -2.092e-01 -#>    50   parent     0.69   0.70679 -1.679e-02 -#>    50   parent     0.63   0.70679 -7.679e-02 +#>    50   parent     0.69   0.70678 -1.678e-02 +#>    50   parent     0.63   0.70678 -7.678e-02  #>    75   parent     0.05   0.05952 -9.523e-03  #>    75   parent     0.06   0.05952  4.772e-04  #>     1       m1     4.84   4.81075  2.925e-02  #>     1       m1     5.64   4.81075  8.292e-01 -#>     3       m1    12.91  13.04196 -1.320e-01 -#>     3       m1    12.96  13.04196 -8.196e-02 -#>     7       m1    22.97  25.06847 -2.098e+00 -#>     7       m1    24.47  25.06847 -5.985e-01 +#>     3       m1    12.91  13.04197 -1.320e-01 +#>     3       m1    12.96  13.04197 -8.197e-02 +#>     7       m1    22.97  25.06848 -2.098e+00 +#>     7       m1    24.47  25.06848 -5.985e-01  #>    14       m1    41.69  36.70308  4.987e+00  #>    14       m1    33.21  36.70308 -3.493e+00  #>    21       m1    44.37  41.65115  2.719e+00  #>    21       m1    46.44  41.65115  4.789e+00  #>    35       m1    41.22  43.29465 -2.075e+00  #>    35       m1    37.95  43.29465 -5.345e+00 -#>    50       m1    41.19  41.19948 -9.479e-03 +#>    50       m1    41.19  41.19948 -9.477e-03  #>    50       m1    40.01  41.19948 -1.189e+00  #>    75       m1    40.09  36.44035  3.650e+00  #>    75       m1    33.85  36.44035 -2.590e+00 -#>   100       m1    31.04  31.98773 -9.477e-01 -#>   100       m1    33.13  31.98773  1.142e+00 -#>   120       m1    25.15  28.80429 -3.654e+00 -#>   120       m1    33.31  28.80429  4.506e+00</div><div class='input'><span class='no'>f.tc</span> <span class='kw'><-</span> <span class='fu'>mkinfit</span>(<span class='no'>SFO_SFO.ff</span>, <span class='no'>FOCUS_2006_D</span>, <span class='kw'>error_model</span> <span class='kw'>=</span> <span class='st'>"tc"</span>, <span class='kw'>quiet</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>)</div><div class='output co'>#> <span class='warning'>Warning: Observations with value of zero were removed from the data</span></div><div class='input'><span class='fu'><a href='https://rdrr.io/r/base/summary.html'>summary</a></span>(<span class='no'>f.tc</span>)</div><div class='output co'>#> mkin version used for fitting:    0.9.49.6  +#>   100       m1    31.04  31.98772 -9.477e-01 +#>   100       m1    33.13  31.98772  1.142e+00 +#>   120       m1    25.15  28.80428 -3.654e+00 +#>   120       m1    33.31  28.80428  4.506e+00</div><div class='input'><span class='no'>f.tc</span> <span class='kw'><-</span> <span class='fu'>mkinfit</span>(<span class='no'>SFO_SFO.ff</span>, <span class='no'>FOCUS_2006_D</span>, <span class='kw'>error_model</span> <span class='kw'>=</span> <span class='st'>"tc"</span>, <span class='kw'>quiet</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>)</div><div class='output co'>#> <span class='warning'>Warning: Observations with value of zero were removed from the data</span></div><div class='input'><span class='fu'><a href='https://rdrr.io/r/base/summary.html'>summary</a></span>(<span class='no'>f.tc</span>)</div><div class='output co'>#> mkin version used for fitting:    0.9.49.6   #> R version used for fitting:       3.6.1  -#> Date of fit:     Thu Sep 19 09:51:22 2019  -#> Date of summary: Thu Sep 19 09:51:22 2019  +#> Date of fit:     Mon Oct 21 12:08:06 2019  +#> Date of summary: Mon Oct 21 12:08:06 2019   #>   #> Equations:  #> d_parent/dt = - k_parent * parent @@ -859,7 +853,7 @@ Per default, parameters in the kinetic models are internally transformed in  #>   #> Model predictions using solution type deSolve   #>  -#> Fitted using 2289 model solutions performed in 9.24 s +#> Fitted using 2289 model solutions performed in 9.175 s  #>   #> Error model: Two-component variance function   #>  @@ -867,22 +861,22 @@ Per default, parameters in the kinetic models are internally transformed in  #> Direct fitting and three-step fitting yield approximately the same likelihood   #>   #> Starting values for parameters to be optimised: -#>                       value   type -#> parent_0       1.007500e+02  state -#> k_parent       1.000000e-01 deparm -#> k_m1           1.001000e-01 deparm -#> f_parent_to_m1 5.000000e-01 deparm -#> sigma_low      5.641148e-03  error -#> rsd_high       8.430766e-02  error +#>                   value   type +#> parent_0       100.7500  state +#> k_parent         0.1000 deparm +#> k_m1             0.1001 deparm +#> f_parent_to_m1   0.5000 deparm +#> sigma_low        0.1000  error +#> rsd_high         0.1000  error  #>   #> Starting values for the transformed parameters actually optimised: -#>                        value lower upper -#> parent_0       100.750000000  -Inf   Inf -#> log_k_parent    -2.302585093  -Inf   Inf -#> log_k_m1        -2.301585593  -Inf   Inf -#> f_parent_ilr_1   0.000000000  -Inf   Inf -#> sigma_low        0.005641148     0   Inf -#> rsd_high         0.084307660     0   Inf +#>                     value lower upper +#> parent_0       100.750000  -Inf   Inf +#> log_k_parent    -2.302585  -Inf   Inf +#> log_k_m1        -2.301586  -Inf   Inf +#> f_parent_ilr_1   0.000000  -Inf   Inf +#> sigma_low        0.100000     0   Inf +#> rsd_high         0.100000     0   Inf  #>   #> Fixed parameter values:  #>      value  type diff --git a/man/mkinfit.Rd b/man/mkinfit.Rd index f7dd7009..85b742e8 100644 --- a/man/mkinfit.Rd +++ b/man/mkinfit.Rd @@ -31,8 +31,8 @@ mkinfit(mkinmod, observed,    quiet = FALSE,    atol = 1e-8, rtol = 1e-10, n.outtimes = 100,    error_model = c("const", "obs", "tc"), -  error_model_algorithm = c("d_3", "direct", "twostep", "threestep", "fourstep", "IRLS",  -                            "OLS"), +  error_model_algorithm = c("auto", "d_3", "direct", "twostep", "threestep", +    "fourstep", "IRLS", "OLS"),    reweight.tol = 1e-8, reweight.max.iter = 10,    trace_parms = FALSE, ...)  } @@ -175,12 +175,13 @@ mkinfit(mkinmod, observed,      distribution as assumed by this method.    }    \item{error_model_algorithm}{ -    If the error model is "const", the error model algorithm is ignored, -    because no special algorithm is needed and unweighted (also known as -    ordinary) least squares fitting (listed as "OLS" in the summary) can be -    applied. +    If "auto", the selected algorithm depends on the error model.  +    If the error model is "const", nonlinear least squares fitting ("OLS") is +    selected. If the error model is "obs", iteratively reweighted least squares +    fitting ("IRLS") is selected. If the error model is "tc", the "d_3"  +    algorithm is selected. -    The default algorithm "d_3" will directly minimize the negative +    The algorithm "d_3" will directly minimize the negative      log-likelihood and - independently - also use the three step algorithm      described below. The fit with the higher likelihood is returned. @@ -206,9 +207,6 @@ mkinfit(mkinmod, observed,      parameters and subsequent      optimization of the degradation model using those error model parameters,      until the error model parameters converge. - -    The algorithm "OLS" (Ordinary Least Squares) is automatically selected when -    the error model is "const" and results in an unweighted least squares fit.    }    \item{reweight.tol}{      Tolerance for the convergence criterion calculated from the error model @@ -1,30 +1,30 @@  Loading mkin  Testing mkin  ✔ |  OK F W S | Context -
⠏ |   0       | Export dataset for reading into CAKE
✔ |   1       | Export dataset for reading into CAKE -
⠏ |   0       | Error model fitting
⠋ |   1       | Error model fitting
⠹ |   3       | Error model fitting
⠸ |   4       | Error model fitting
⠼ |   5       | Error model fitting
⠴ |   6       | Error model fitting
⠧ |   8       | Error model fitting
⠏ |  10       | Error model fitting
⠋ |  10   1   | Error model fitting
⠋ |  10   1   | Error model fitting
⠋ |  10   1   | Error model fitting
⠋ |  10   1   | Error model fitting
⠋ |  10   1   | Error model fitting
⠋ |  10   1   | Error model fitting
⠋ |  10   1   | Error model fitting
⠋ |  10   1   | Error model fitting
⠋ |  10   1   | Error model fitting
⠋ |  10   1   | Error model fitting
⠋ |  10   1   | Error model fitting
⠋ |  11       | Error model fitting
⠹ |  13       | Error model fitting
⠸ |  14       | Error model fitting
⠼ |  15       | Error model fitting
⠴ |  16       | Error model fitting
⠦ |  17       | Error model fitting
⠧ |  18       | Error model fitting
⠇ |  19       | Error model fitting
⠏ |  20       | Error model fitting
✔ |  20       | Error model fitting [413.8 s] -
⠏ |   0       | Calculation of FOCUS chi2 error levels
⠋ |   1       | Calculation of FOCUS chi2 error levels
⠹ |   3       | Calculation of FOCUS chi2 error levels
⠼ |   5       | Calculation of FOCUS chi2 error levels
✔ |   5       | Calculation of FOCUS chi2 error levels [3.5 s] -
⠏ |   0       | Results for FOCUS D established in expertise for UBA (Ranke 2014)
⠋ |   1       | Results for FOCUS D established in expertise for UBA (Ranke 2014)
⠙ |   2       | Results for FOCUS D established in expertise for UBA (Ranke 2014)
⠸ |   4       | Results for FOCUS D established in expertise for UBA (Ranke 2014)
⠇ |   9       | Results for FOCUS D established in expertise for UBA (Ranke 2014)
✔ |  13       | Results for FOCUS D established in expertise for UBA (Ranke 2014) [3.7 s] -
⠏ |   0       | Test fitting the decline of metabolites from their maximum
⠋ |   1       | Test fitting the decline of metabolites from their maximum
⠹ |   3       | Test fitting the decline of metabolites from their maximum
⠼ |   5       | Test fitting the decline of metabolites from their maximum
✔ |   6       | Test fitting the decline of metabolites from their maximum [0.9 s] -
⠏ |   0       | Fitting the logistic model
⠋ |   1       | Fitting the logistic model
✔ |   1       | Fitting the logistic model [0.9 s] -
⠏ |   0       | Test dataset class mkinds used in gmkin
✔ |   1       | Test dataset class mkinds used in gmkin -
⠏ |   0       | Special cases of mkinfit calls
⠋ |   1       | Special cases of mkinfit calls
⠇ |   9       | Special cases of mkinfit calls
⠏ |  10       | Special cases of mkinfit calls
⠋ |  11       | Special cases of mkinfit calls
⠙ |  12       | Special cases of mkinfit calls
✔ |  12       | Special cases of mkinfit calls [2.7 s] -
⠏ |   0       | mkinmod model generation and printing
⠇ |   9       | mkinmod model generation and printing
✔ |   9       | mkinmod model generation and printing [0.2 s] -
⠏ |   0       | Model predictions with mkinpredict
⠋ |   1       | Model predictions with mkinpredict
✔ |   3       | Model predictions with mkinpredict [0.3 s] -
⠏ |   0       | Evaluations according to 2015 NAFTA guidance
⠙ |   2       | Evaluations according to 2015 NAFTA guidance
⠇ |   9       | Evaluations according to 2015 NAFTA guidance
⠏ |  10       | Evaluations according to 2015 NAFTA guidance
⠴ |  16       | Evaluations according to 2015 NAFTA guidance
✔ |  16       | Evaluations according to 2015 NAFTA guidance [4.0 s] -
⠏ |   0       | Fitting of parent only models
⠋ |   1       | Fitting of parent only models
⠙ |   2       | Fitting of parent only models
⠹ |   3       | Fitting of parent only models
⠸ |   4       | Fitting of parent only models
⠼ |   5       | Fitting of parent only models
⠴ |   6       | Fitting of parent only models
⠦ |   7       | Fitting of parent only models
⠧ |   8       | Fitting of parent only models
⠇ |   9       | Fitting of parent only models
⠏ |  10       | Fitting of parent only models
⠋ |  11       | Fitting of parent only models
⠙ |  12       | Fitting of parent only models
⠹ |  13       | Fitting of parent only models
⠴ |  16       | Fitting of parent only models
⠧ |  18       | Fitting of parent only models
⠏ |  20       | Fitting of parent only models
✔ |  21       | Fitting of parent only models [40.6 s] -
⠏ |   0       | Calculation of maximum time weighted average concentrations (TWAs)
⠋ |   1       | Calculation of maximum time weighted average concentrations (TWAs)
⠙ |   2       | Calculation of maximum time weighted average concentrations (TWAs)
⠹ |   3       | Calculation of maximum time weighted average concentrations (TWAs)
⠸ |   4       | Calculation of maximum time weighted average concentrations (TWAs)
✔ |   4       | Calculation of maximum time weighted average concentrations (TWAs) [2.2 s] -
⠏ |   0       | Summary
✔ |   1       | Summary -
⠏ |   0       | Plotting
⠹ |   3       | Plotting
✔ |   4       | Plotting [0.3 s] -
⠏ |   0       | AIC calculation
✔ |   2       | AIC calculation -
⠏ |   0       | Complex test case from Schaefer et al. (2007) Piacenza paper
⠋ |   1       | Complex test case from Schaefer et al. (2007) Piacenza paper
✔ |   2       | Complex test case from Schaefer et al. (2007) Piacenza paper [5.2 s] -
⠏ |   0       | Summaries of old mkinfit objects
✔ |   1       | Summaries of old mkinfit objects -
⠏ |   0       | Results for synthetic data established in expertise for UBA (Ranke 2014)
⠋ |   1       | Results for synthetic data established in expertise for UBA (Ranke 2014)
⠹ |   3       | Results for synthetic data established in expertise for UBA (Ranke 2014)
✔ |   4       | Results for synthetic data established in expertise for UBA (Ranke 2014) [7.2 s] +✔ |   1       | Export dataset for reading into CAKE +✔ |   2       | Confidence intervals and p-values [5.6 s] +✔ |  11       | Error model fitting [53.4 s] +✔ |   5       | Calculation of FOCUS chi2 error levels [3.5 s] +✔ |  13       | Results for FOCUS D established in expertise for UBA (Ranke 2014) [3.7 s] +✔ |   6       | Test fitting the decline of metabolites from their maximum [0.9 s] +✔ |   1       | Fitting the logistic model [0.9 s] +✔ |   1       | Test dataset class mkinds used in gmkin +✔ |  12       | Special cases of mkinfit calls [2.7 s] +✔ |   9       | mkinmod model generation and printing [0.2 s] +✔ |   3       | Model predictions with mkinpredict [0.3 s] +✔ |  16       | Evaluations according to 2015 NAFTA guidance [4.0 s] +✔ |   4       | Calculation of maximum time weighted average concentrations (TWAs) [2.2 s] +✔ |   1       | Summary +✔ |   4       | Plotting [0.3 s] +✔ |   2       | AIC calculation +✔ |   2       | Complex test case from Schaefer et al. (2007) Piacenza paper [5.2 s] +✔ |   1       | Summaries of old mkinfit objects +✔ |   4       | Results for synthetic data established in expertise for UBA (Ranke 2014) [7.1 s]  ══ Results ═════════════════════════════════════════════════════════════════════ -Duration: 491.0 s +Duration: 95.5 s -OK:       126 +OK:       98  Failed:   0  Warnings: 0  Skipped:  0 diff --git a/tests/testthat/DFOP_FOCUS_C_messages.txt b/tests/testthat/DFOP_FOCUS_C_messages.txt index d3d7688b..78438d06 100644 --- a/tests/testthat/DFOP_FOCUS_C_messages.txt +++ b/tests/testthat/DFOP_FOCUS_C_messages.txt @@ -1,4 +1,4 @@ -parent_0 log_k1 log_k2 g_ilr sigma  +parent_0 log_k1 log_k2 g_ilr   85.1 -2.302585 -4.60517 0   Sum of squared residuals at call 1: 7391.39  85.1 -2.302585 -4.60517 0  diff --git a/tests/testthat/FOCUS_2006_D.csf b/tests/testthat/FOCUS_2006_D.csf index f9233770..171abbb0 100644 --- a/tests/testthat/FOCUS_2006_D.csf +++ b/tests/testthat/FOCUS_2006_D.csf @@ -5,7 +5,7 @@ Description:  MeasurementUnits: % AR  TimeUnits: days  Comments: Created using mkin::CAKE_export -Date: 2019-07-08 +Date: 2019-10-21  Optimiser: IRLS  [Data] diff --git a/tests/testthat/test_parent_only.R b/tests/testthat/slow/test_parent_only.R index 7521e145..7521e145 100644 --- a/tests/testthat/test_parent_only.R +++ b/tests/testthat/slow/test_parent_only.R diff --git a/tests/testthat/slow/test_roundtrip_error_parameters.R b/tests/testthat/slow/test_roundtrip_error_parameters.R new file mode 100644 index 00000000..97510563 --- /dev/null +++ b/tests/testthat/slow/test_roundtrip_error_parameters.R @@ -0,0 +1,141 @@ +test_that("Reweighting method 'tc' produces reasonable variance estimates", { + +  # Check if we can approximately obtain the parameters and the error model +  # components that were used in the data generation + +  # Parent only +  DFOP <- mkinmod(parent = mkinsub("DFOP")) +  sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) +  parms_DFOP <- c(k1 = 0.2, k2 = 0.02, g = 0.5) +  parms_DFOP_optim <- c(parent_0 = 100, parms_DFOP) + +  d_DFOP <- mkinpredict(DFOP, +     parms_DFOP, c(parent = 100), +     sampling_times) +  d_2_10 <- add_err(d_DFOP, +    sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), +    n = 10, reps = 2, digits = 5, LOD = -Inf, seed = 123456) +  d_100_1 <- add_err(d_DFOP, +    sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), +    n = 1, reps = 100, digits = 5, LOD = -Inf, seed = 123456) + +  # Per default (on my box where I set NOT_CRAN) use all cores minus one +  if (identical(Sys.getenv("NOT_CRAN"), "true")) { +    n_cores <- parallel::detectCores() - 1 +  } else { +    n_cores <- 1 +  } + +  # We are only allowed one core on travis, but they also set NOT_CRAN=true +  if (Sys.getenv("TRAVIS") != "") n_cores = 1 + +  # On Windows we would need to make a cluster first +  if (Sys.info()["sysname"] == "Windows") n_cores = 1 + +  # Unweighted fits +  f_2_10 <- mmkin("DFOP", d_2_10, error_model = "const", quiet = TRUE, +    cores = n_cores) +  parms_2_10 <- apply(sapply(f_2_10, function(x) x$bparms.optim), 1, mean) +  parm_errors_2_10 <- (parms_2_10 - parms_DFOP_optim) / parms_DFOP_optim +  expect_true(all(abs(parm_errors_2_10) < 0.12)) + +  f_2_10_tc <- mmkin("DFOP", d_2_10, error_model = "tc", quiet = TRUE, +    cores = n_cores) +  parms_2_10_tc <- apply(sapply(f_2_10_tc, function(x) x$bparms.optim), 1, mean) +  parm_errors_2_10_tc <- (parms_2_10_tc - parms_DFOP_optim) / parms_DFOP_optim +  expect_true(all(abs(parm_errors_2_10_tc) < 0.05)) + +  tcf_2_10_tc <- apply(sapply(f_2_10_tc, function(x) x$errparms), 1, mean, na.rm = TRUE) + +  tcf_2_10_error_model_errors <- (tcf_2_10_tc - c(0.5, 0.07)) / c(0.5, 0.07) +  expect_true(all(abs(tcf_2_10_error_model_errors) < 0.2)) + +  # When we have 100 replicates in the synthetic data, we can roundtrip +  # the parameters with < 2% precision +  f_tc_100_1 <- mkinfit(DFOP, d_100_1[[1]], error_model = "tc", quiet = TRUE) +  parm_errors_100_1 <- (f_tc_100_1$bparms.optim - parms_DFOP_optim) / parms_DFOP_optim +  expect_true(all(abs(parm_errors_100_1) < 0.02)) + +  tcf_100_1_error_model_errors <- (f_tc_100_1$errparms - c(0.5, 0.07)) / +    c(0.5, 0.07) +  # We also get a precision of < 2% for the error model components +  expect_true(all(abs(tcf_100_1_error_model_errors) < 0.02)) + +  # Parent and two metabolites +  m_synth_DFOP_lin <- mkinmod(parent = list(type = "DFOP", to = "M1"), +                             M1 = list(type = "SFO", to = "M2"), +                             M2 = list(type = "SFO"), use_of_ff = "max", +                             quiet = TRUE) +  sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) +  parms_DFOP_lin <- c(k1 = 0.2, k2 = 0.02, g = 0.5, +     f_parent_to_M1 = 0.5, k_M1 = 0.3, +     f_M1_to_M2 = 0.7, k_M2 = 0.02) +  d_synth_DFOP_lin <- mkinpredict(m_synth_DFOP_lin, +     parms_DFOP_lin, +     c(parent = 100, M1 = 0, M2 = 0), +     sampling_times) +  parms_DFOP_lin_optim = c(parent_0 = 100, parms_DFOP_lin) + +  d_met_2_15 <- add_err(d_synth_DFOP_lin, +    sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), +    n = 15, reps = 100, digits = 5, LOD = 0.01, seed = 123456) + +  # For a single fit, we get a relative error of less than 5% in the error +  # model components +  f_met_2_tc_e4 <- mkinfit(m_synth_DFOP_lin, d_met_2_15[[1]], quiet = TRUE, +    error_model = "tc", error_model_algorithm = "direct") +  parm_errors_met_2_tc_e4 <- (f_met_2_tc_e4$errparms - c(0.5, 0.07)) / c(0.5, 0.07) +  expect_true(all(abs(parm_errors_met_2_tc_e4) < 0.05)) + +  # Doing more takes a lot of computing power +  skip_on_travis() +  skip_on_cran() +  f_met_2_15_tc_e4 <- mmkin(list(m_synth_DFOP_lin), d_met_2_15, quiet = TRUE, +                            error_model = "tc", cores = n_cores) + +  parms_met_2_15_tc_e4 <- apply(sapply(f_met_2_15_tc_e4, function(x) x$bparms.optim), 1, mean) +  parm_errors_met_2_15_tc_e4 <- (parms_met_2_15_tc_e4[names(parms_DFOP_lin_optim)] - +                                 parms_DFOP_lin_optim) / parms_DFOP_lin_optim +  expect_true(all(abs(parm_errors_met_2_15_tc_e4) < 0.015)) + +  tcf_met_2_15_tc <- apply(sapply(f_met_2_15_tc_e4, function(x) x$errparms), 1, mean, na.rm = TRUE) + +  tcf_met_2_15_tc_error_model_errors <- (tcf_met_2_15_tc - c(0.5, 0.07)) / +    c(0.5, 0.07) + +  # Here we get a precision < 10% for retrieving the original error model components +  # from 15 datasets +  expect_true(all(abs(tcf_met_2_15_tc_error_model_errors) < 0.10)) +}) + +test_that("The different error model fitting methods work for parent fits", { +  skip_on_cran() + +  f_9_OLS <- mkinfit("SFO", experimental_data_for_UBA_2019[[9]]$data, +                     quiet = TRUE) +  expect_equivalent(round(AIC(f_9_OLS), 2), 137.43) + +  f_9_direct <- mkinfit("SFO", experimental_data_for_UBA_2019[[9]]$data, +    error_model = "tc", error_model_algorithm = "direct", quiet = TRUE) +  expect_equivalent(round(AIC(f_9_direct), 2), 134.94) + +  f_9_twostep <- mkinfit("SFO", experimental_data_for_UBA_2019[[9]]$data, +    error_model = "tc", error_model_algorithm = "twostep", quiet = TRUE) +  expect_equivalent(round(AIC(f_9_twostep), 2), 134.94) + +  f_9_threestep <- mkinfit("SFO", experimental_data_for_UBA_2019[[9]]$data, +    error_model = "tc", error_model_algorithm = "threestep", quiet = TRUE) +  expect_equivalent(round(AIC(f_9_threestep), 2), 139.43) + +  f_9_fourstep <- mkinfit("SFO", experimental_data_for_UBA_2019[[9]]$data, +    error_model = "tc", error_model_algorithm = "fourstep", quiet = TRUE) +  expect_equivalent(round(AIC(f_9_fourstep), 2), 139.43) + +  f_9_IRLS <- mkinfit("SFO", experimental_data_for_UBA_2019[[9]]$data, +    error_model = "tc", error_model_algorithm = "IRLS", quiet = TRUE) +  expect_equivalent(round(AIC(f_9_IRLS), 2), 139.43) + +  f_9_d_3 <- mkinfit("SFO", experimental_data_for_UBA_2019[[9]]$data, +    error_model = "tc", error_model_algorithm = "d_3", quiet = TRUE) +  expect_equivalent(round(AIC(f_9_d_3), 2), 134.94) +}) diff --git a/tests/testthat/summary_DFOP_FOCUS_C.txt b/tests/testthat/summary_DFOP_FOCUS_C.txt index b1afeff6..90ce82e2 100644 --- a/tests/testthat/summary_DFOP_FOCUS_C.txt +++ b/tests/testthat/summary_DFOP_FOCUS_C.txt @@ -17,12 +17,11 @@ Error model: Constant variance  Error model algorithm: OLS   Starting values for parameters to be optimised: -             value   type -parent_0 85.100000  state -k1        0.100000 deparm -k2        0.010000 deparm -g         0.500000 deparm -sigma     0.696237  error +         value   type +parent_0 85.10  state +k1        0.10 deparm +k2        0.01 deparm +g         0.50 deparm  Starting values for the transformed parameters actually optimised:               value lower upper @@ -30,7 +29,6 @@ parent_0 85.100000  -Inf   Inf  log_k1   -2.302585  -Inf   Inf  log_k2   -4.605170  -Inf   Inf  g_ilr     0.000000  -Inf   Inf -sigma     0.696237     0   Inf  Fixed parameter values:  None diff --git a/tests/testthat/test_confidence.R b/tests/testthat/test_confidence.R new file mode 100644 index 00000000..e5cc1954 --- /dev/null +++ b/tests/testthat/test_confidence.R @@ -0,0 +1,51 @@ +# Copyright (C) 2019 Johannes Ranke +# Contact: jranke@uni-bremen.de + +# This file is part of the R package mkin + +# mkin is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. + +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more +# details. + +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/> + +context("Confidence intervals and p-values") + +m_synth_SFO_lin <- mkinmod( +  parent = mkinsub("SFO", "M1"), +  M1 = mkinsub("SFO", "M2"), +  M2 = mkinsub("SFO"), +  use_of_ff = "max", quiet = TRUE) + +SFO_lin_a <- synthetic_data_for_UBA_2014[[1]]$data + +test_that("Confidence intervals are stable", { +  f_1_mkin_OLS <- mkinfit(m_synth_SFO_lin, SFO_lin_a, quiet = TRUE) +  f_1_mkin_ML <- mkinfit(m_synth_SFO_lin, SFO_lin_a, quiet = TRUE, +    error_model = "const", error_model_algorithm = "direct") + +  bpar_1 <- summary(f_1_mkin_ML)$bpar[, c("Estimate", "Lower", "Upper")] +  # The reference used here is mkin 0.9.48.1 +  bpar_1_mkin_0.9 <-   read.table(text = +"parent_0       102.0000 98.6000 106.0000 +k_parent         0.7390  0.6770   0.8070 +k_M1             0.2990  0.2560   0.3490 +k_M2             0.0202  0.0176   0.0233 +f_parent_to_M1   0.7690  0.6640   0.8480 +f_M1_to_M2       0.7230  0.6030   0.8180", +col.names = c("parameter", "estimate", "lower", "upper")) + +  expect_equivalent(signif(bpar_1[1:6, "Estimate"], 3), bpar_1_mkin_0.9$estimate) + +  # Relative difference of lower bound of the confidence interval is < 0.02 +  expect_equivalent(bpar_1[1:6, "Lower"], bpar_1_mkin_0.9$lower, +      scale = bpar_1_mkin_0.9$lower, tolerance = 0.02) +  }) + diff --git a/tests/testthat/test_error_models.R b/tests/testthat/test_error_models.R index fbae6286..f4015e00 100644 --- a/tests/testthat/test_error_models.R +++ b/tests/testthat/test_error_models.R @@ -35,25 +35,18 @@ DFOP_par_c <- synthetic_data_for_UBA_2014[[12]]$data  test_that("Error model 'const' works", {    skip_on_cran()    fit_const_1 <- mkinfit(m_synth_SFO_lin, SFO_lin_a, error_model = "const", quiet = TRUE) -  bpar_1 <- summary(fit_const_1)$bpar[, c("Estimate", "Lower", "Upper")] +  bpar_1 <- fit_const_1$bparms.optim    # The reference used here is mkin 0.9.48.1    bpar_1_mkin_0.9 <-   read.table(text = -"parent_0       102.0000 98.6000 106.0000 -k_parent         0.7390  0.6770   0.8070 -k_M1             0.2990  0.2560   0.3490 -k_M2             0.0202  0.0176   0.0233 -f_parent_to_M1   0.7690  0.6640   0.8480 -f_M1_to_M2       0.7230  0.6030   0.8180", -col.names = c("parameter", "estimate", "lower", "upper")) - -  expect_equivalent(signif(bpar_1[1:6, "Estimate"], 3), bpar_1_mkin_0.9$estimate) -  # Relative difference of lower bound of confidence is < 0.02 -  rel_diff <- function(v1, v2) { -    (v1 - v2)/v2 -  } -  expect_equivalent(rel_diff(bpar_1[1:6, "Lower"], -                             bpar_1_mkin_0.9$lower), -                    rep(0, 6), tolerance = 0.02) +"parent_0       102.0000 +k_parent         0.7390 +k_M1             0.2990 +k_M2             0.0202 +f_parent_to_M1   0.7690 +f_M1_to_M2       0.7230", +col.names = c("parameter", "estimate")) + +  expect_equivalent(signif(bpar_1, 3), bpar_1_mkin_0.9$estimate)  })  test_that("Error model 'obs' works", { @@ -70,117 +63,6 @@ test_that("Error model 'tc' works", {    expect_equivalent(parms_3, c(102.1, 0.7393, 0.2992, 0.0202, 0.7687, 0.7229))  }) -test_that("Reweighting method 'tc' produces reasonable variance estimates", { - -  # Check if we can approximately obtain the parameters and the error model -  # components that were used in the data generation - -  # Parent only -  DFOP <- mkinmod(parent = mkinsub("DFOP")) -  sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) -  parms_DFOP <- c(k1 = 0.2, k2 = 0.02, g = 0.5) -  parms_DFOP_optim <- c(parent_0 = 100, parms_DFOP) - -  d_DFOP <- mkinpredict(DFOP, -     parms_DFOP, c(parent = 100), -     sampling_times) -  d_2_10 <- add_err(d_DFOP, -    sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), -    n = 10, reps = 2, digits = 5, LOD = -Inf, seed = 123456) -  d_100_1 <- add_err(d_DFOP, -    sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), -    n = 1, reps = 100, digits = 5, LOD = -Inf, seed = 123456) - -  # Per default (on my box where I set NOT_CRAN) use all cores minus one -  if (identical(Sys.getenv("NOT_CRAN"), "true")) { -    n_cores <- parallel::detectCores() - 1 -  } else { -    n_cores <- 1 -  } - -  # We are only allowed one core on travis, but they also set NOT_CRAN=true -  if (Sys.getenv("TRAVIS") != "") n_cores = 1 - -  # On Windows we would need to make a cluster first -  if (Sys.info()["sysname"] == "Windows") n_cores = 1 - -  # Unweighted fits -  f_2_10 <- mmkin("DFOP", d_2_10, error_model = "const", quiet = TRUE, -    cores = n_cores) -  parms_2_10 <- apply(sapply(f_2_10, function(x) x$bparms.optim), 1, mean) -  parm_errors_2_10 <- (parms_2_10 - parms_DFOP_optim) / parms_DFOP_optim -  expect_true(all(abs(parm_errors_2_10) < 0.12)) - -  f_2_10_tc <- mmkin("DFOP", d_2_10, error_model = "tc", quiet = TRUE, -    cores = n_cores) -  parms_2_10_tc <- apply(sapply(f_2_10_tc, function(x) x$bparms.optim), 1, mean) -  parm_errors_2_10_tc <- (parms_2_10_tc - parms_DFOP_optim) / parms_DFOP_optim -  expect_true(all(abs(parm_errors_2_10_tc) < 0.05)) - -  tcf_2_10_tc <- apply(sapply(f_2_10_tc, function(x) x$errparms), 1, mean, na.rm = TRUE) - -  tcf_2_10_error_model_errors <- (tcf_2_10_tc - c(0.5, 0.07)) / c(0.5, 0.07) -  expect_true(all(abs(tcf_2_10_error_model_errors) < 0.2)) - -  # When we have 100 replicates in the synthetic data, we can roundtrip -  # the parameters with < 2% precision -  f_tc_100_1 <- mkinfit(DFOP, d_100_1[[1]], error_model = "tc", quiet = TRUE) -  parm_errors_100_1 <- (f_tc_100_1$bparms.optim - parms_DFOP_optim) / parms_DFOP_optim -  expect_true(all(abs(parm_errors_100_1) < 0.02)) - -  tcf_100_1_error_model_errors <- (f_tc_100_1$errparms - c(0.5, 0.07)) / -    c(0.5, 0.07) -  # When maximising the likelihood directly (not using IRLS), we get -  # a precision of < 2% for the error model componentes as well -  expect_true(all(abs(tcf_100_1_error_model_errors) < 0.02)) - -  # Parent and two metabolites -  m_synth_DFOP_lin <- mkinmod(parent = list(type = "DFOP", to = "M1"), -                             M1 = list(type = "SFO", to = "M2"), -                             M2 = list(type = "SFO"), use_of_ff = "max", -                             quiet = TRUE) -  sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) -  parms_DFOP_lin <- c(k1 = 0.2, k2 = 0.02, g = 0.5, -     f_parent_to_M1 = 0.5, k_M1 = 0.3, -     f_M1_to_M2 = 0.7, k_M2 = 0.02) -  d_synth_DFOP_lin <- mkinpredict(m_synth_DFOP_lin, -     parms_DFOP_lin, -     c(parent = 100, M1 = 0, M2 = 0), -     sampling_times) -  parms_DFOP_lin_optim = c(parent_0 = 100, parms_DFOP_lin) - -  d_met_2_15 <- add_err(d_synth_DFOP_lin, -    sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), -    n = 15, reps = 100, digits = 5, LOD = 0.01, seed = 123456) - -  # For a single fit, we get a relative error of less than 10%  in the error -  # model components -  f_met_2_tc_e4 <- mkinfit(m_synth_DFOP_lin, d_met_2_15[[1]], quiet = TRUE, -    error_model = "tc", error_model_algorithm = "direct") -  parm_errors_met_2_tc_e4 <- (f_met_2_tc_e4$errparms - c(0.5, 0.07)) / c(0.5, 0.07) -  expect_true(all(abs(parm_errors_met_2_tc_e4) < 0.1)) - -  # Doing more takes a lot of computing power -  skip_on_travis() -  skip_on_cran() -  f_met_2_15_tc_e4 <- mmkin(list(m_synth_DFOP_lin), d_met_2_15, quiet = TRUE, -                            error_model = "tc", cores = n_cores) - -  parms_met_2_15_tc_e4 <- apply(sapply(f_met_2_15_tc_e4, function(x) x$bparms.optim), 1, mean) -  parm_errors_met_2_15_tc_e4 <- (parms_met_2_15_tc_e4[names(parms_DFOP_lin_optim)] - -                                 parms_DFOP_lin_optim) / parms_DFOP_lin_optim -  expect_true(all(abs(parm_errors_met_2_15_tc_e4) < 0.015)) - -  tcf_met_2_15_tc <- apply(sapply(f_met_2_15_tc_e4, function(x) x$errparms), 1, mean, na.rm = TRUE) - -  tcf_met_2_15_tc_error_model_errors <- (tcf_met_2_15_tc - c(0.5, 0.07)) / -    c(0.5, 0.07) - -  # Here we get a precision < 10% for retrieving the original error model components -  # from 15 datasets -  expect_true(all(abs(tcf_met_2_15_tc_error_model_errors) < 0.10)) -}) -  test_that("The different error model fitting methods work for parent fits", {    skip_on_cran() diff --git a/tests_slow.log b/tests_slow.log new file mode 100644 index 00000000..6337d80a --- /dev/null +++ b/tests_slow.log @@ -0,0 +1,11 @@ +✔ |  OK F W S | Context +✔ |  21       | Fitting of parent only models [39.2 s] +✔ |  15       | test_roundtrip_error_parameters [354.9 s] + +══ Results ═════════════════════════════════════════════════════════════════════ +Duration: 394.1 s + +OK:       36 +Failed:   0 +Warnings: 0 +Skipped:  0 | 
