From 7a1d3d031aa23fce723ac4f4c8e4bb5d64959447 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Thu, 4 Apr 2019 15:42:23 +0200 Subject: Direct error model fitting works - No IRLS required - Removed optimization algorithms other than Port - Removed the dependency on FME - Fitting the error model 'obs' is much faster for the FOCUS_2006_D dataset and the FOMC_SFO model (1 second versus 3.4 seconds) - Vignettes build slower. Compiled models needs 3 minutes instead of 1.5 - For other vignettes, the trend is less clear. Some fits are faster, even for error_model = "const". FOCUS_Z is faster (34.9 s versus 44.1 s) - Standard errors and confidence intervals are slightly smaller - Removed code for plotting during the fit, as I hardly ever used it - Merged the two cost functions (using transformed and untransformed parameters) into one log-likelihood function --- DESCRIPTION | 8 +- NAMESPACE | 4 +- R/mkinfit.R | 499 +++------ build.log | 9 - man/mkinfit.Rd | 46 +- tests/testthat/test_error_models.R | 178 ++++ tests/testthat/test_irls.R | 142 --- vignettes/FOCUS_D.html | 102 +- vignettes/FOCUS_L.html | 1674 ++++++++++++++++++++++++++++--- vignettes/web_only/compiled_models.Rmd | 2 +- vignettes/web_only/compiled_models.html | 92 +- 11 files changed, 1983 insertions(+), 773 deletions(-) create mode 100644 tests/testthat/test_error_models.R delete mode 100644 tests/testthat/test_irls.R diff --git a/DESCRIPTION b/DESCRIPTION index c6746eb7..527d79d1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: mkin Type: Package Title: Kinetic Evaluation of Chemical Degradation Data -Version: 0.9.48.1 -Date: 2019-03-04 +Version: 0.9.49.1 +Date: 2019-04-04 Authors@R: c(person("Johannes", "Ranke", role = c("aut", "cre", "cph"), email = "jranke@uni-bremen.de", comment = c(ORCID = "0000-0003-4371-6538")), @@ -17,8 +17,8 @@ Description: Calculation routines based on the FOCUS Kinetics Report (2006, equation models are solved using compiled C functions. Please note that no warranty is implied for correctness of results or fitness for a particular purpose. -Imports: stats, graphics, methods, FME, deSolve, R6, minpack.lm, rootSolve, - inline, parallel, plyr +Imports: stats, graphics, methods, deSolve, R6, inline, parallel, plyr, + numDeriv Suggests: knitr, rbenchmark, tikzDevice, testthat, rmarkdown, covr, vdiffr License: GPL LazyLoad: yes diff --git a/NAMESPACE b/NAMESPACE index 6ca41a43..18adb9c5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,9 +22,6 @@ S3method(mkinpredict, mkinfit) import( stats, graphics, - FME, - minpack.lm, - rootSolve, inline, parallel ) @@ -35,3 +32,4 @@ importFrom(R6, R6Class) importFrom(grDevices, dev.cur) importFrom(plyr, join) importFrom(utils, write.table) +importFrom(numDeriv, hessian) diff --git a/R/mkinfit.R b/R/mkinfit.R index 4ac54ce2..796b25c7 100644 --- a/R/mkinfit.R +++ b/R/mkinfit.R @@ -30,19 +30,12 @@ mkinfit <- function(mkinmod, observed, solution_type = c("auto", "analytical", "eigen", "deSolve"), method.ode = "lsoda", use_compiled = "auto", - method.modFit = c("Port", "Marq", "SANN", "Nelder-Mead", "BFGS", "CG", "L-BFGS-B"), - maxit.modFit = "auto", - control.modFit = list(), + control = list(eval.max = 300, iter.max = 200), transform_rates = TRUE, transform_fractions = TRUE, plot = FALSE, quiet = FALSE, - err = NULL, - weight = c("none", "manual", "std", "mean", "tc"), - tc = c(sigma_low = 0.5, rsd_high = 0.07), - scaleVar = FALSE, atol = 1e-8, rtol = 1e-10, n.outtimes = 100, - reweight.method = NULL, - reweight.tol = 1e-8, reweight.max.iter = 10, + error_model = c("const", "obs", "tc", "obs_tc"), trace_parms = FALSE, ...) { @@ -61,19 +54,6 @@ mkinfit <- function(mkinmod, observed, } } - # Check optimisation method and set maximum number of iterations if specified - method.modFit = match.arg(method.modFit) - if (maxit.modFit != "auto") { - if (method.modFit == "Marq") control.modFit$maxiter = maxit.modFit - if (method.modFit == "Port") { - control.modFit$iter.max = maxit.modFit - control.modFit$eval.max = maxit.modFit - } - if (method.modFit %in% c("SANN", "Nelder-Mead", "BFGS", "CG", "L-BFGS-B")) { - control.modFit$maxit = maxit.modFit - } - } - # Get the names of the state variables in the model mod_vars <- names(mkinmod$diffs) @@ -96,6 +76,9 @@ mkinfit <- function(mkinmod, observed, observed$time <- observed$time - t_of_max } + # Number observations used for fitting + n_observed <- nrow(observed) + # Define starting values for parameters where not specified by the user if (parms.ini[[1]] == "auto") parms.ini = vector() @@ -254,32 +237,42 @@ mkinfit <- function(mkinmod, observed, } } + # Get the error model and specify starting values as well as fixed components + err_mod <- match.arg(error_model) + if (err_mod == "const") { + errparms = c(sigma = 1) + } + if (err_mod == "obs") { + errparms <- rep(1, length(obs_vars)) + names(errparms) = paste0("sigma_", obs_vars) + } + if (err_mod == "tc") { + errparms <- c(sigma_low = 0.5, rsd_high = 0.07) + } + if (err_mod == "obs_tc") { + errparms <- rep(1, length(obs_vars)) + names(errparms) = paste0("sigma_", obs_vars) + errparms <- c(errparms, a = 0.1) + } + # Define outtimes for model solution. # Include time points at which observed data are available outtimes = sort(unique(c(observed$time, seq(min(observed$time), max(observed$time), length.out = n.outtimes)))) - weight.ini <- weight <- match.arg(weight) - if (weight.ini == "tc") { - observed$err = sigma_twocomp(observed$value, tc["sigma_low"], tc["rsd_high"]) - err <- "err" - } else { - if (!is.null(err)) weight.ini = "manual" - } - - cost.old <- 1e100 # The first model cost should be smaller than this value calls <- 0 # Counter for number of model solutions out_predicted <- NA - # Define the model cost function for optimisation, including (back)transformations - cost <- function(P) + # Define log-likelihood function for optimisation, including (back)transformations + nlogLik <- function(P, trans = TRUE) { assign("calls", calls+1, inherits=TRUE) # Increase the model solution counter # Trace parameter values if requested if(trace_parms) cat(P, "\n") + # Initial states for t0 if(length(state.ini.optim) > 0) { odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed) names(odeini) <- c(state.ini.optim.boxnames, state.ini.fixed.boxnames) @@ -288,13 +281,18 @@ mkinfit <- function(mkinmod, observed, names(odeini) <- state.ini.fixed.boxnames } - odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], transparms.fixed) + odeparms.optim <- P[(length(state.ini.optim) + 1):(length(P) - length(errparms))] - parms <- backtransform_odeparms(odeparms, mkinmod, - transform_rates = transform_rates, - transform_fractions = transform_fractions) + if (trans == TRUE) { + odeparms <- c(odeparms.optim, transparms.fixed) + parms <- backtransform_odeparms(odeparms, mkinmod, + transform_rates = transform_rates, + transform_fractions = transform_fractions) + } else { + parms <- c(odeparms.optim, parms.fixed) + } - # Solve the system with current transformed parameter values + # Solve the system with current parameter values out <- mkinpredict(mkinmod, parms, odeini, outtimes, solution_type = solution_type, @@ -302,82 +300,55 @@ mkinfit <- function(mkinmod, observed, method.ode = method.ode, atol = atol, rtol = rtol, ...) - assign("out_predicted", out, inherits=TRUE) - - mC <- modCost(out, observed, y = "value", - err = err, weight = weight, scaleVar = scaleVar) - - # Report and/or plot if the model is improved - if (mC$model < cost.old) { - if(!quiet) cat("Model cost at call ", calls, ": ", mC$model, "\n") - - # Plot the data and current model output if requested - if(plot) { - outtimes_plot = seq(min(observed$time), max(observed$time), length.out=100) - - out_plot <- mkinpredict(mkinmod, parms, - odeini, outtimes_plot, - solution_type = solution_type, - use_compiled = use_compiled, - method.ode = method.ode, - atol = atol, rtol = rtol, ...) - - plot(0, type="n", - xlim = range(observed$time), ylim = c(0, max(observed$value, na.rm=TRUE)), - xlab = "Time", ylab = "Observed") - col_obs <- pch_obs <- 1:length(obs_vars) - lty_obs <- rep(1, length(obs_vars)) - names(col_obs) <- names(pch_obs) <- names(lty_obs) <- obs_vars - for (obs_var in obs_vars) { - points(subset(observed, name == obs_var, c(time, value)), - pch = pch_obs[obs_var], col = col_obs[obs_var]) - } - matlines(out_plot$time, out_plot[-1], col = col_obs, lty = lty_obs) - legend("topright", inset=c(0.05, 0.05), legend=obs_vars, - col=col_obs, pch=pch_obs, lty=1:length(pch_obs)) - } + out_long <- mkin_wide_to_long(out, time = "time") - assign("cost.old", mC$model, inherits=TRUE) - } - return(mC) - } + assign("out_predicted", out_long, inherits=TRUE) - # Define the model cost function for the t-test, without parameter transformation - cost_notrans <- function(P) - { - if(length(state.ini.optim) > 0) { - odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed) - names(odeini) <- c(state.ini.optim.boxnames, state.ini.fixed.boxnames) - } else { - odeini <- state.ini.fixed - names(odeini) <- state.ini.fixed.boxnames + if (err_mod == "const") { + observed$std <- P["sigma"] + } + if (err_mod == "obs") { + std_names <- paste0("sigma_", observed$name) + observed$std <- P[std_names] + } + if (err_mod == "tc") { + tmp <- merge(observed, out_long, by = c("time", "name")) + tmp$name <- ordered(tmp$name, levels = obs_vars) + tmp <- tmp[order(tmp$name, tmp$time), ] + observed$std <- sqrt(P["sigma_low"]^2 + tmp$value.y^2 * P["rsd_high"]^2) + } + if (err_mod == "obs_tc") { + tmp <- merge(observed, out_long, by = c("time", "name")) + tmp$name <- ordered(tmp$name, levels = obs_vars) + tmp <- tmp[order(tmp$name, tmp$time), ] + std_names <- paste0("sigma_", observed$name) + observed$std <- sqrt(P[std_names] * (1 + P["a"] * tmp$value.y^2)) } - odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], parms.fixed) - - # Solve the system with current parameter values - out <- mkinpredict(mkinmod, odeparms, - odeini, outtimes, - solution_type = solution_type, - use_compiled = use_compiled, - method.ode = method.ode, - atol = atol, rtol = rtol, ...) - - mC <- modCost(out, observed, y = "value", - err = err, weight = weight, scaleVar = scaleVar) + data_log_lik <- merge(observed[c("name", "time", "value", "std")], out_long, + by = c("name", "time")) - return(mC) + nlogLik <- - with(data_log_lik, + sum(dnorm(x = value.x, mean = value.y, sd = std, log = TRUE))) + return(nlogLik) } + n_optim <- length(c(state.ini.optim, transparms.optim, errparms)) + names_optim <- c(names(state.ini.optim), + names(transparms.optim), + names(errparms)) + # Define lower and upper bounds other than -Inf and Inf for parameters - # for which no internal transformation is requested in the call to mkinfit. - lower <- rep(-Inf, length(c(state.ini.optim, transparms.optim))) - upper <- rep(Inf, length(c(state.ini.optim, transparms.optim))) - names(lower) <- names(upper) <- c(names(state.ini.optim), names(transparms.optim)) + # for which no internal transformation is requested in the call to mkinfit + # and for error model parameters + lower <- rep(-Inf, n_optim) + upper <- rep(Inf, n_optim) + names(lower) <- names(upper) <- names_optim # IORE exponentes are not transformed, but need a lower bound of zero index_N <- grep("^N", names(lower)) lower[index_N] <- 0 + if (!transform_rates) { index_k <- grep("^k_", names(lower)) lower[index_k] <- 0 @@ -396,151 +367,44 @@ mkinfit <- function(mkinmod, observed, upper[other_fraction_parms] <- 1 } + if (err_mod == "const") { + lower["sigma"] <- 0 + } + if (err_mod == "obs") { + index_sigma <- grep("^sigma_", names(lower)) + lower[index_sigma] <- 0 + } + if (err_mod == "tc") { + lower["sigma_low"] <- 0 + lower["rsd_high"] <- 0 + } + if (err_mod == "obs_tc") { + index_sigma <- grep("^sigma_", names(lower)) + lower[index_sigma] <- 0 + lower["a"] <- 0 + } + # Show parameter names if tracing is requested - if(trace_parms) cat(names(c(state.ini.optim, transparms.optim)), "\n") + if(trace_parms) cat(names_optim, "\n") # Do the fit and take the time fit_time <- system.time({ - fit <- modFit(cost, c(state.ini.optim, transparms.optim), - method = method.modFit, control = control.modFit, - lower = lower, upper = upper, ...) - - # Reiterate the fit until convergence of the variance components (IRLS) - # if requested by the user - - if (!is.null(reweight.method)) { - if (! reweight.method %in% c("obs", "tc")) stop("Only reweighting methods 'obs' and 'tc' are implemented") - - if (reweight.method == "obs") { - tc_fit <- NA - if(!quiet) { - cat("IRLS based on variance estimates for each observed variable\n") - cat("Initial variance estimates are:\n") - print(signif(fit$var_ms_unweighted, 8)) - } - } - if (reweight.method == "tc") { - tc_fit <- .fit_error_model_mad_obs(cost(fit$par)$residuals, tc, 0) - - if (is.character(tc_fit)) { - if (!quiet) { - cat(tc_fit, ".\n", "No reweighting will be performed.") - } - tc_fitted <- c(sigma_low = NA, rsd_high = NA) - } else { - tc_fitted <- coef(tc_fit) - if(!quiet) { - cat("IRLS based on variance estimates according to the two component error model\n") - cat("Initial variance components are:\n") - print(signif(tc_fitted)) - } - } - } - reweight.diff = 1 - n.iter <- 0 - if (!is.null(err)) observed$err.ini <- observed[[err]] - err = "err.irls" - - while (reweight.diff > reweight.tol & - n.iter < reweight.max.iter & - !is.character(tc_fit)) { - n.iter <- n.iter + 1 - # Store squared residual predictors used for weighting in sr_old and define new weights - if (reweight.method == "obs") { - sr_old <- fit$var_ms_unweighted - observed[err] <- sqrt(fit$var_ms_unweighted[as.character(observed$name)]) - } - if (reweight.method == "tc") { - sr_old <- tc_fitted - - tmp_predicted <- mkin_wide_to_long(out_predicted, time = "time") - tmp_data <- suppressMessages(join(observed, tmp_predicted, by = c("time", "name"))) - - #observed[err] <- predict(tc_fit, newdata = data.frame(mod = tmp_data[[4]])) - observed[err] <- predict(tc_fit, newdata = data.frame(obs = observed$value)) - - } - fit <- modFit(cost, fit$par, method = method.modFit, - control = control.modFit, lower = lower, upper = upper, ...) + fit <- nlminb(c(state.ini.optim, transparms.optim, errparms), + nlogLik, control = control, + lower = lower, upper = upper, ...)}) - if (reweight.method == "obs") { - sr_new <- fit$var_ms_unweighted - } - if (reweight.method == "tc") { - tc_fit <- .fit_error_model_mad_obs(cost(fit$par)$residuals, tc_fitted, n.iter) - - if (is.character(tc_fit)) { - if (!quiet) { - cat(tc_fit, ".\n") - } - break - } else { - tc_fitted <- coef(tc_fit) - sr_new <- tc_fitted - } - } - - reweight.diff = sum((sr_new - sr_old)^2) - if (!quiet) { - cat("Iteration", n.iter, "yields variance estimates:\n") - print(signif(sr_new, 8)) - cat("Sum of squared differences to last variance (component) estimates:", - signif(reweight.diff, 2), "\n") - } - } - } - }) - - # Check for convergence - if (method.modFit == "Marq") { - if (!fit$info %in% c(1, 2, 3)) { - fit$warning = paste0("Optimisation by method ", method.modFit, - " did not converge.\n", - "The message returned by nls.lm is:\n", - fit$message) - warning(fit$warning) - } - else { - if(!quiet) cat("Optimisation by method", method.modFit, "successfully terminated.\n") - } - } - if (method.modFit == "Port") { - if (fit$convergence != 0) { - fit$warning = paste0("Optimisation by method ", method.modFit, - " did not converge:\n", - if(is.character(fit$counts)) fit$counts # FME bug - else fit$message) - warning(fit$warning) - } else { - if(!quiet) cat("Optimisation by method", method.modFit, "successfully terminated.\n") - } - } - if (method.modFit %in% c("SANN", "Nelder-Mead", "BFGS", "CG", "L-BFGS-B")) { - if (fit$convergence != 0) { - fit$warning = paste0("Optimisation by method ", method.modFit, - " did not converge.\n", - "Convergence code returned by optim() is", fit$convergence) - warning(fit$warning) - } else { - if(!quiet) cat("Optimisation by method", method.modFit, "successfully terminated.\n") - } - } - - # Return number of iterations for SANN method and alert user to check if - # the approximation to the optimum is sufficient - if (method.modFit == "SANN") { - fit$iter = maxit.modFit - fit$warning <- paste0("Termination of the SANN algorithm does not imply convergence.\n", - "Make sure the approximation of the optimum is adequate.") + if (fit$convergence != 0) { + fit$warning = paste0("Optimisation did not converge:\n", fit$message) warning(fit$warning) + } else { + if(!quiet) cat("Optimisation successfully terminated.\n") } # We need to return some more data for summary and plotting fit$solution_type <- solution_type fit$transform_rates <- transform_rates fit$transform_fractions <- transform_fractions - fit$method.modFit <- method.modFit - fit$maxit.modFit <- maxit.modFit + fit$control <- control fit$calls <- calls fit$time <- fit_time @@ -550,40 +414,30 @@ mkinfit <- function(mkinmod, observed, # We need data and predictions for summary and plotting fit$observed <- observed fit$obs_vars <- obs_vars - fit$predicted <- mkin_wide_to_long(out_predicted, time = "time") + fit$predicted <- out_predicted # Backtransform parameters bparms.optim = backtransform_odeparms(fit$par, fit$mkinmod, - transform_rates = transform_rates, - transform_fractions = transform_fractions) + transform_rates = transform_rates, + transform_fractions = transform_fractions) bparms.fixed = c(state.ini.fixed, parms.fixed) bparms.all = c(bparms.optim, parms.fixed) - # Attach the cost functions to the fit for post-hoc parameter uncertainty analysis - fit$cost <- cost - fit$cost_notrans <- cost_notrans - - # Estimate the Hessian for the model cost without parameter transformations - # to make it possible to obtain the usual t-test - # Code ported from FME::modFit - Jac_notrans <- try(gradient(function(p, ...) cost_notrans(p)$residuals$res, - bparms.optim, centered = TRUE), silent = TRUE) - if (inherits(Jac_notrans, "try-error")) { - warning("Calculation of the Jacobian failed for the cost function of the untransformed model.\n", - "No t-test results will be available") - fit$hessian_notrans <- NA - } else { - fit$hessian_notrans <- 2 * t(Jac_notrans) %*% Jac_notrans - } + # Attach the negative log-likelihood function for post-hoc parameter uncertainty analysis + fit$nlogLik <- nlogLik + + fit$hessian <- hessian(nlogLik, fit$par) + fit$hessian_notrans <- hessian(nlogLik, c(bparms.optim, fit$par[names(errparms)]), trans = FALSE) # Collect initial parameter values in three dataframes fit$start <- data.frame(value = c(state.ini.optim, - parms.optim)) + parms.optim, errparms)) fit$start$type = c(rep("state", length(state.ini.optim)), - rep("deparm", length(parms.optim))) + rep("deparm", length(parms.optim)), + rep("error", length(errparms))) fit$start_transformed = data.frame( - value = c(state.ini.optim, transparms.optim), + value = c(state.ini.optim, transparms.optim, errparms), lower = lower, upper = upper) @@ -596,28 +450,16 @@ mkinfit <- function(mkinmod, observed, data$name <- ordered(data$name, levels = obs_vars) data <- data[order(data$name, data$time), ] - # Add a column named "value" holding the observed values for the case - # that this column was selected for manual weighting, so it can be - # shown in the summary as "err" - data$value <- data$value.x - fit$data <- data.frame(time = data$time, variable = data$name, observed = data$value.x, predicted = data$value.y) fit$data$residual <- fit$data$observed - fit$data$predicted - if (!is.null(data$err.ini)) fit$data$err.ini <- data$err.ini - if (!is.null(err)) fit$data[["err"]] <- data[[err]] fit$atol <- atol fit$rtol <- rtol - fit$weight.ini <- weight.ini - fit$tc.ini <- tc - fit$reweight.method <- reweight.method - fit$reweight.tol <- reweight.tol - fit$reweight.max.iter <- reweight.max.iter - if (exists("tc_fitted")) fit$tc_fitted <- tc_fitted + fit$err_mod <- err_mod # Return different sets of backtransformed parameters for summary and plotting fit$bparms.optim <- bparms.optim @@ -629,6 +471,9 @@ mkinfit <- function(mkinmod, observed, state.ini.fixed) names(fit$bparms.state) <- gsub("_0$", "", names(fit$bparms.state)) + fit$errparms.optim <- fit$par[names(errparms)] + fit$df.residual <- n_observed - n_optim + fit$date <- date() fit$version <- as.character(utils::packageVersion("mkin")) fit$Rversion <- paste(R.version$major, R.version$minor, sep=".") @@ -641,39 +486,41 @@ summary.mkinfit <- function(object, data = TRUE, distimes = TRUE, alpha = 0.05, param <- object$par pnames <- names(param) bpnames <- names(object$bparms.optim) + epnames <- names(object$errparms.optim) p <- length(param) mod_vars <- names(object$mkinmod$diffs) - covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance - covar_notrans <- try(solve(0.5*object$hessian_notrans), silent = TRUE) # unscaled covariance - rdf <- object$df.residual - resvar <- object$ssr / rdf + covar <- try(solve(object$hessian), silent = TRUE) + covar_notrans <- try(solve(object$hessian_notrans), silent = TRUE) + rdf <- object$df.residual + if (!is.numeric(covar) | is.na(covar[1])) { covar <- NULL se <- lci <- uci <- rep(NA, p) } else { rownames(covar) <- colnames(covar) <- pnames - se <- sqrt(diag(covar) * resvar) + se <- sqrt(diag(covar)) lci <- param + qt(alpha/2, rdf) * se uci <- param + qt(1-alpha/2, rdf) * se } - + + beparms.optim <- c(object$bparms.optim, object$par[epnames]) if (!is.numeric(covar_notrans) | is.na(covar_notrans[1])) { covar_notrans <- NULL se_notrans <- tval <- pval <- rep(NA, p) } else { - rownames(covar_notrans) <- colnames(covar_notrans) <- bpnames - se_notrans <- sqrt(diag(covar_notrans) * resvar) - tval <- object$bparms.optim/se_notrans - pval <- pt(abs(tval), rdf, lower.tail = FALSE) + rownames(covar_notrans) <- colnames(covar_notrans) <- c(bpnames, epnames) + se_notrans <- sqrt(diag(covar_notrans)) + tval <- beparms.optim / se_notrans + pval <- pt(abs(tval), rdf, lower.tail = FALSE) } names(se) <- pnames - modVariance <- object$ssr / length(object$residuals) param <- cbind(param, se, lci, uci) dimnames(param) <- list(pnames, c("Estimate", "Std. Error", "Lower", "Upper")) - bparam <- cbind(Estimate = object$bparms.optim, se_notrans, "t value" = tval, "Pr(>t)" = pval, Lower = NA, Upper = NA) + bparam <- cbind(Estimate = beparms.optim, se_notrans, + "t value" = tval, "Pr(>t)" = pval, Lower = NA, Upper = NA) # Transform boundaries of CI for one parameter at a time, # with the exception of sets of formation fractions (single fractions are OK). @@ -699,6 +546,7 @@ summary.mkinfit <- function(object, data = TRUE, distimes = TRUE, alpha = 0.05, bparam[names(bpu), "Upper"] <- bpu } } + bparam[epnames, c("Lower", "Upper")] <- param[epnames, c("Lower", "Upper")] ans <- list( version = as.character(utils::packageVersion("mkin")), @@ -706,26 +554,14 @@ summary.mkinfit <- function(object, data = TRUE, distimes = TRUE, alpha = 0.05, date.fit = object$date, date.summary = date(), solution_type = object$solution_type, - method.modFit = object$method.modFit, warning = object$warning, use_of_ff = object$mkinmod$use_of_ff, - weight.ini = object$weight.ini, - reweight.method = object$reweight.method, - tc.ini = object$tc.ini, - var_ms_unweighted = object$var_ms_unweighted, - tc_fitted = object$tc_fitted, - residuals = object$residuals, - residualVariance = resvar, - sigma = sqrt(resvar), - modVariance = modVariance, df = c(p, rdf), cov.unscaled = covar, - cov.scaled = covar * resvar, - info = object$info, + #cov.scaled = covar * resvar, niter = object$iterations, calls = object$calls, time = object$time, - stopmess = message, par = param, bpar = bparam) @@ -789,24 +625,8 @@ print.summary.mkinfit <- function(x, digits = max(3, getOption("digits") - 3), . cat("\nFitted with method", x$method.modFit, "using", x$calls, "model solutions performed in", x$time[["elapsed"]], "s\n") - cat("\nWeighting:", x$weight.ini) - if (x$weight.ini == "tc") { - cat(" with variance components\n") - print(x$tc.ini) - } else { - cat ("\n") - } - if(!is.null(x$reweight.method)) { - cat("\nIterative reweighting with method", x$reweight.method, "\n") - if (x$reweight.method == "obs") { - cat("Final mean squared residuals of observed variables:\n") - print(x$var_ms_unweighted) - } - if (x$reweight.method == "tc") { - cat("Final components of fitted standard deviation:\n") - print(x$tc_fitted) - } - } + cat("\nError model:\n") + print(x$err_mod) cat("\nStarting values for parameters to be optimised:\n") print(x$start) @@ -830,9 +650,6 @@ print.summary.mkinfit <- function(x, digits = max(3, getOption("digits") - 3), . } } - cat("\nResidual standard error:", - format(signif(x$sigma, digits)), "on", rdf, "degrees of freedom\n") - cat("\nBacktransformed parameters:\n") cat("Confidence intervals for internally transformed parameters are asymmetric.\n") if ((x$version) < "0.9-36") { @@ -874,54 +691,4 @@ print.summary.mkinfit <- function(x, digits = max(3, getOption("digits") - 3), . invisible(x) } - -# Fit the median absolute deviation against the observed values, -# using the current error model for weighting -.fit_error_model_mad_obs <- function(tmp_res, tc, iteration) { - mad_agg <- aggregate(tmp_res$res.unweighted, - by = list(name = tmp_res$name, time = tmp_res$x), - FUN = function(x) mad(x, center = 0)) - names(mad_agg) <- c("name", "time", "mad") - error_data <- suppressMessages( - join(data.frame(name = tmp_res$name, - time = tmp_res$x, - obs = tmp_res$obs), - mad_agg)) - error_data_complete <- na.omit(error_data) - - tc_fit <- tryCatch( - nls(mad ~ sigma_twocomp(obs, sigma_low, rsd_high), - start = list(sigma_low = tc["sigma_low"], rsd_high = tc["rsd_high"]), - weights = 1/sigma_twocomp(error_data_complete$obs, - tc["sigma_low"], - tc["rsd_high"])^2, - data = error_data_complete, - lower = 0, - algorithm = "port"), - error = function(e) paste("Fitting the error model failed in iteration", iteration)) - return(tc_fit) -} -# Alternative way to fit the error model, fitting to modelled instead of -# observed values -# .fit_error_model_mad_mod <- function(tmp_res, tc) { -# mad_agg <- aggregate(tmp_res$res.unweighted, -# by = list(name = tmp_res$name, time = tmp_res$x), -# FUN = function(x) mad(x, center = 0)) -# names(mad_agg) <- c("name", "time", "mad") -# mod_agg <- aggregate(tmp_res$mod, -# by = list(name = tmp_res$name, time = tmp_res$x), -# FUN = mean) -# names(mod_agg) <- c("name", "time", "mod") -# mod_mad <- merge(mod_agg, mad_agg) -# -# tc_fit <- tryCatch( -# nls(mad ~ sigma_twocomp(mod, sigma_low, rsd_high), -# start = list(sigma_low = tc["sigma_low"], rsd_high = tc["rsd_high"]), -# data = mod_mad, -# weights = 1/mod_mad$mad, -# lower = 0, -# algorithm = "port"), -# error = "Fitting the error model failed in iteration") -# return(tc_fit) -# } # vim: set ts=2 sw=2 expandtab: diff --git a/build.log b/build.log index 6831459d..976d27d4 100644 --- a/build.log +++ b/build.log @@ -1,10 +1 @@ * checking for file ‘./DESCRIPTION’ ... OK -* preparing ‘mkin’: -* checking DESCRIPTION meta-information ... OK -* installing the package to build vignettes -* creating vignettes ... OK -* checking for LF line-endings in source and make files and shell scripts -* checking for empty or unneeded directories -* looking to see if a ‘data/datalist’ file should be added -* building ‘mkin_0.9.48.1.tar.gz’ - diff --git a/man/mkinfit.Rd b/man/mkinfit.Rd index 228bab24..59bb5e5f 100644 --- a/man/mkinfit.Rd +++ b/man/mkinfit.Rd @@ -35,8 +35,7 @@ mkinfit(mkinmod, observed, tc = c(sigma_low = 0.5, rsd_high = 0.07), scaleVar = FALSE, atol = 1e-8, rtol = 1e-10, n.outtimes = 100, - reweight.method = NULL, - reweight.tol = 1e-8, reweight.max.iter = 10, + error_model = c("auto", "obs", "tc", "const"), trace_parms = FALSE, ...) } \arguments{ @@ -202,33 +201,22 @@ mkinfit(mkinmod, observed, the numerical solver if that is used (see \code{solution_type} argument. The default value is 100. } - \item{reweight.method}{ - The method used for iteratively reweighting residuals, also known - as iteratively reweighted least squares (IRLS). Default is NULL, - i.e. no iterative weighting. - The first reweighting method is called "obs", meaning that each - observed variable is assumed to have its own variance. This variance - is estimated from the fit (mean squared residuals) and used for weighting - the residuals in each iteration until convergence of this estimate up to - \code{reweight.tol} or up to the maximum number of iterations - specified by \code{reweight.max.iter}. - The second reweighting method is called "tc" (two-component error model). - When using this method, the two components of an error model similar to - the one described by - Rocke and Lorenzato (1995) are estimated from the fit and the resulting - variances are used for weighting the residuals in each iteration until - convergence of these components or up to the maximum number of iterations - specified. Note that this method deviates from the model by Rocke and - Lorenzato, as their model implies that the errors follow a lognormal - distribution for large values, not a normal distribution as assumed by this - method. - } - \item{reweight.tol}{ - Tolerance for convergence criterion for the variance components - in IRLS fits. - } - \item{reweight.max.iter}{ - Maximum iterations in IRLS fits. + \item{error_model}{ + If the error model is "auto", the generalised error model described by Ranke + et al. (2019) is used for specifying the likelihood function. Simplications + of this error model are tested as well and the model yielding the lowest + AIC is returned. + + If the error model is "obs", each observed variable is assumed to have its + own variance. + + If the error model is "tc" (two-component error model). + When using this method, a two component error model similar to the + one described by Rocke and Lorenzato (1995) is used for setting up + the likelihood function, as described in the abovementioned paper. + Note that this model deviates from the model by Rocke and Lorenzato, as + their model implies that the errors follow a lognormal distribution for + large values, not a normal distribution as assumed by this method. } \item{trace_parms}{ Should a trace of the parameter values be listed? diff --git a/tests/testthat/test_error_models.R b/tests/testthat/test_error_models.R new file mode 100644 index 00000000..bda8ca7f --- /dev/null +++ b/tests/testthat/test_error_models.R @@ -0,0 +1,178 @@ +# Copyright (C) 2018 Johannes Ranke +# Contact: jranke@uni-bremen.de + +# This file is part of the R package mkin + +# mkin is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. + +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. + +# You should have received a copy of the GNU General Public License along with +# this program. If not, see + +context("Error model fitting") + +m_synth_SFO_lin <- mkinmod(parent = mkinsub("SFO", "M1"), + M1 = mkinsub("SFO", "M2"), + M2 = mkinsub("SFO"), + use_of_ff = "max", quiet = TRUE) + +m_synth_DFOP_par <- mkinmod(parent = mkinsub("DFOP", c("M1", "M2")), + M1 = mkinsub("SFO"), + M2 = mkinsub("SFO"), + use_of_ff = "max", quiet = TRUE) + +SFO_lin_a <- synthetic_data_for_UBA_2014[[1]]$data + +DFOP_par_c <- synthetic_data_for_UBA_2014[[12]]$data + +test_that("Error model 'const' works", { + skip_on_cran() + fit_const_1 <- mkinfit(m_synth_SFO_lin, SFO_lin_a, error_model = "const", quiet = TRUE) + bpar_1 <- summary(fit_const_1)$bpar[, c("Estimate", "Lower", "Upper")] + # The reference used here is mkin 0.9.48.1 + bpar_1_mkin_0.9 <- read.table(text = +"parent_0 102.0000 98.6000 106.0000 +k_parent 0.7390 0.6770 0.8070 +k_M1 0.2990 0.2560 0.3490 +k_M2 0.0202 0.0176 0.0233 +f_parent_to_M1 0.7690 0.6640 0.8480 +f_M1_to_M2 0.7230 0.6030 0.8180", +col.names = c("parameter", "estimate", "lower", "upper")) + + expect_equivalent(signif(bpar_1[1:6, "Estimate"], 3), bpar_1_mkin_0.9$estimate) + # Relative difference of lower bound of confidence is < 0.02 + rel_diff <- function(v1, v2) { + (v1 - v2)/v2 + } + expect_equivalent(rel_diff(bpar_1[1:6, "Lower"], + bpar_1_mkin_0.9$lower), + rep(0, 6), tolerance = 0.02) +}) + +test_that("Error model 'obs' works", { + skip_on_cran() + fit_obs_1 <- mkinfit(m_synth_SFO_lin, SFO_lin_a, error_model = "obs", quiet = TRUE) + parms_2 <- round(fit_obs_1$bparms.optim, c(1, 4, 4, 4, 4, 4)) + expect_equivalent(parms_2, c(102.1, 0.7389, 0.2982, 0.0203, 0.7677, 0.7246)) +}) + +test_that("Error model 'tc' works", { + skip_on_cran() + fit_tc_1 <- mkinfit(m_synth_SFO_lin, SFO_lin_a, error_model = "tc", quiet = TRUE) + parms_3 <- round(fit_tc_1$bparms.optim, c(1, 4, 4, 4, 4, 4)) + expect_equivalent(parms_3, c(102.1, 0.7393, 0.2992, 0.0202, 0.7687, 0.7229)) +}) + +test_that("Error model 'obs_tc' works", { + skip_on_cran() + fit_obs_tc_1 <- expect_warning(mkinfit(m_synth_SFO_lin, SFO_lin_a, error_model = "obs_tc", quiet = TRUE), "NaN") + # Here the error model is overparameterised + expect_warning(summary(fit_obs_tc_1), "singular system") +}) + +test_that("Reweighting method 'tc' produces reasonable variance estimates", { + + # I need to make the tc method more robust against that + # skip_on_cran() + + # Check if we can approximately obtain the parameters and the error model + # components that were used in the data generation + + # Parent only + DFOP <- mkinmod(parent = mkinsub("DFOP")) + sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) + parms_DFOP <- c(k1 = 0.2, k2 = 0.02, g = 0.5) + parms_DFOP_optim <- c(parent_0 = 100, parms_DFOP) + + d_DFOP <- mkinpredict(DFOP, + parms_DFOP, c(parent = 100), + sampling_times) + d_2_10 <- add_err(d_DFOP, + sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), + n = 10, reps = 2, digits = 5, LOD = -Inf, seed = 123456) + d_100_1 <- add_err(d_DFOP, + sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), + n = 1, reps = 100, digits = 5, LOD = -Inf, seed = 123456) + + # Unweighted fits + f_2_10 <- mmkin("DFOP", d_2_10, error_model = "const", quiet = TRUE, + cores = if (Sys.getenv("TRAVIS") != "") 1 else 15) + parms_2_10 <- apply(sapply(f_2_10, function(x) x$bparms.optim), 1, mean) + parm_errors_2_10 <- (parms_2_10 - parms_DFOP_optim) / parms_DFOP_optim + expect_true(all(abs(parm_errors_2_10) < 0.12)) + + f_2_10_tc <- mmkin("DFOP", d_2_10, error_model = "tc", quiet = TRUE, + cores = if (Sys.getenv("TRAVIS") != "") 1 else 15) + parms_2_10_tc <- apply(sapply(f_2_10_tc, function(x) x$bparms.optim), 1, mean) + parm_errors_2_10_tc <- (parms_2_10_tc - parms_DFOP_optim) / parms_DFOP_optim + expect_true(all(abs(parm_errors_2_10_tc) < 0.05)) + + tcf_2_10_tc <- apply(sapply(f_2_10_tc, function(x) x$errparms), 1, mean, na.rm = TRUE) + + tcf_2_10_error_model_errors <- (tcf_2_10_tc - c(0.5, 0.07)) / c(0.5, 0.07) + expect_true(all(abs(tcf_2_10_error_model_errors) < 0.2)) + + # When we have 100 replicates in the synthetic data, we can roundtrip + # the parameters with < 2% precision + f_tc_100_1 <- mkinfit(DFOP, d_100_1[[1]], error_model = "tc", quiet = TRUE) + parm_errors_100_1 <- (f_tc_100_1$bparms.optim - parms_DFOP_optim) / parms_DFOP_optim + expect_true(all(abs(parm_errors_100_1) < 0.02)) + + tcf_100_1_error_model_errors <- (f_tc_100_1$errparms - c(0.5, 0.07)) / + c(0.5, 0.07) + # When maximising the likelihood directly (not using IRLS), we get + # a precision of < 2% for the error model componentes as well + expect_true(all(abs(tcf_100_1_error_model_errors) < 0.02)) + + # Parent and two metabolites + m_synth_DFOP_lin <- mkinmod(parent = list(type = "DFOP", to = "M1"), + M1 = list(type = "SFO", to = "M2"), + M2 = list(type = "SFO"), use_of_ff = "max", + quiet = TRUE) + sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) + parms_DFOP_lin <- c(k1 = 0.2, k2 = 0.02, g = 0.5, + f_parent_to_M1 = 0.5, k_M1 = 0.3, + f_M1_to_M2 = 0.7, k_M2 = 0.02) + d_synth_DFOP_lin <- mkinpredict(m_synth_DFOP_lin, + parms_DFOP_lin, + c(parent = 100, M1 = 0, M2 = 0), + sampling_times) + parms_DFOP_lin_optim = c(parent_0 = 100, parms_DFOP_lin) + + d_met_2_15 <- add_err(d_synth_DFOP_lin, + sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), + n = 15, reps = 100, digits = 5, LOD = -Inf, seed = 123456) + + # For a single fit, we get a relative error of less than 10% in the error + # model components + f_met_2_tc_e4 <- mkinfit(m_synth_DFOP_lin, d_met_2_15[[1]], quiet = TRUE, + error_model = "tc") + parm_errors_met_2_tc_e4 <- (f_met_2_tc_e4$errparms - c(0.5, 0.07)) / c(0.5, 0.07) + expect_true(all(abs(parm_errors_met_2_tc_e4) < 0.1)) + + # Doing more takes a lot of computing power + skip_on_travis() + f_met_2_15_tc_e4 <- mmkin(list(m_synth_DFOP_lin), d_met_2_15, quiet = TRUE, + error_model = "tc", cores = 15) + + parms_met_2_15_tc_e4 <- apply(sapply(f_met_2_15_tc_e4, function(x) x$bparms.optim), 1, mean) + parm_errors_met_2_15_tc_e4 <- (parms_met_2_15_tc_e4[names(parms_DFOP_lin_optim)] - + parms_DFOP_lin_optim) / parms_DFOP_lin_optim + expect_true(all(abs(parm_errors_met_2_15_tc_e4) < 0.01)) + + tcf_met_2_15_tc <- apply(sapply(f_met_2_15_tc_e4, function(x) x$errparms), 1, mean, na.rm = TRUE) + + tcf_met_2_15_tc_error_model_errors <- (tcf_met_2_15_tc - c(0.5, 0.07)) / + c(0.5, 0.07) + + # Here we get a precision < 15% for retrieving the original error model components + # from 15 datasets + expect_true(all(abs(tcf_met_2_15_tc_error_model_errors) < 0.15)) +}) diff --git a/tests/testthat/test_irls.R b/tests/testthat/test_irls.R deleted file mode 100644 index f61f793d..00000000 --- a/tests/testthat/test_irls.R +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (C) 2018 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see - -context("Iteratively reweighted least squares (IRLS) fitting") - - -m_synth_SFO_lin <- mkinmod(parent = mkinsub("SFO", "M1"), - M1 = mkinsub("SFO", "M2"), - M2 = mkinsub("SFO"), - use_of_ff = "max", quiet = TRUE) - -m_synth_DFOP_par <- mkinmod(parent = mkinsub("DFOP", c("M1", "M2")), - M1 = mkinsub("SFO"), - M2 = mkinsub("SFO"), - use_of_ff = "max", quiet = TRUE) - -SFO_lin_a <- synthetic_data_for_UBA_2014[[1]]$data - -DFOP_par_c <- synthetic_data_for_UBA_2014[[12]]$data - -test_that("Reweighting method 'obs' works", { - skip_on_cran() - fit_irls_1 <- mkinfit(m_synth_SFO_lin, SFO_lin_a, reweight.method = "obs", quiet = TRUE) - parms_1 <- round(fit_irls_1$bparms.optim, c(1, 4, 4, 4, 4, 4)) - expect_equivalent(parms_1, c(102.1, 0.7389, 0.2982, 0.0203, 0.7677, 0.7246)) -}) - -test_that("Reweighting method 'tc' works", { - fit_irls_2 <- mkinfit(m_synth_SFO_lin, SFO_lin_a, reweight.method = "tc", quiet = TRUE) - parms_2 <- round(fit_irls_2$bparms.optim, c(1, 4, 4, 4, 4, 4)) - expect_equivalent(parms_2, c(102.1, 0.7393, 0.2992, 0.0202, 0.7687, 0.7229)) - - skip("Too much trouble with datasets that are randomly generated") - # I need to make the tc method more robust against that - # skip_on_cran() - - # Check if we can approximately obtain the parameters and the error model - # components that were used in the data generation - - # Parent only - DFOP <- mkinmod(parent = mkinsub("DFOP")) - sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) - parms_DFOP <- c(k1 = 0.2, k2 = 0.02, g = 0.5) - parms_DFOP_optim <- c(parent_0 = 100, parms_DFOP) - d_DFOP <- mkinpredict(DFOP, - parms_DFOP, c(parent = 100), - sampling_times) - d_2_10 <- add_err(d_DFOP, - sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), - n = 10, reps = 2, digits = 5, LOD = -Inf) - d_100_1 <- add_err(d_DFOP, - sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), - n = 1, reps = 100, digits = 5, LOD = -Inf) - - f_2_10 <- mmkin("DFOP", d_2_10, quiet = TRUE, - cores = if (Sys.getenv("TRAVIS") != "") 1 else 15) - parms_2_10 <- apply(sapply(f_2_10, function(x) x$bparms.optim), 1, mean) - parm_errors_2_10 <- (parms_2_10 - parms_DFOP_optim) / parms_DFOP_optim - expect_true(all(abs(parm_errors_2_10) < 0.45)) - - f_2_10_tc <- mmkin("DFOP", d_2_10, reweight.method = "tc", quiet = TRUE, - cores = if (Sys.getenv("TRAVIS") != "") 1 else 15) - parms_2_10_tc <- apply(sapply(f_2_10_tc, function(x) x$bparms.optim), 1, mean) - parm_errors_2_10_tc <- (parms_2_10_tc - parms_DFOP_optim) / parms_DFOP_optim - expect_true(all(abs(parm_errors_2_10_tc) < 0.25)) - - tcf_2_10_tc <- apply(sapply(f_2_10_tc, function(x) x$tc_fitted), 1, mean, na.rm = TRUE) - - tcf_2_10_error_model_errors <- (tcf_2_10_tc - c(0.5, 0.07)) / c(0.5, 0.07) - expect_true(all(abs(tcf_2_10_error_model_errors) < 0.5)) - - f_tc_100_1 <- suppressWarnings(mkinfit(DFOP, d_100_1[[1]], reweight.method = "tc", quiet = TRUE)) - parm_errors_100_1 <- (f_tc_100_1$bparms.optim - parms_DFOP_optim) / parms_DFOP_optim - expect_true(all(abs(parm_errors_100_1) < 0.1)) - - tcf_100_1_error_model_errors <- (f_tc_100_1$tc_fitted - c(0.5, 0.07)) / - c(0.5, 0.07) - # Even with 100 (or even 1000, not shown) replicates at each observation time - # we only get a precision of 15% to 30% for the error model components - expect_true(all(abs(tcf_100_1_error_model_errors) < 0.3)) - - # Parent and two metabolites - m_synth_DFOP_lin <- mkinmod(parent = list(type = "DFOP", to = "M1"), - M1 = list(type = "SFO", to = "M2"), - M2 = list(type = "SFO"), use_of_ff = "max", - quiet = TRUE) - sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) - parms_DFOP_lin <- c(k1 = 0.2, k2 = 0.02, g = 0.5, - f_parent_to_M1 = 0.5, k_M1 = 0.3, - f_M1_to_M2 = 0.7, k_M2 = 0.02) - d_synth_DFOP_lin <- mkinpredict(m_synth_DFOP_lin, - parms_DFOP_lin, - c(parent = 100, M1 = 0, M2 = 0), - sampling_times) - parms_DFOP_lin_optim = c(parent_0 = 100, parms_DFOP_lin) - - d_met_2_15 <- add_err(d_synth_DFOP_lin, - sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), - n = 15, reps = 1000, digits = 5, LOD = -Inf) - - # For a single fit, we get a relative error of less than 30% in the error - # model components - f_met_2_tc_e4 <- mkinfit(m_synth_DFOP_lin, d_met_2_15[[1]], quiet = TRUE, - reweight.method = "tc", reweight.tol = 1e-4) - parm_errors_met_2_tc_e4 <- (f_met_2_tc_e4$tc_fitted - c(0.5, 0.07)) / c(0.5, 0.07) - expect_true(all(abs(parm_errors_met_2_tc_e4) < 0.3)) - - # Doing more takes a lot of computing power - skip_on_travis() - f_met_2_15_tc_e4 <- mmkin(list(m_synth_DFOP_lin), d_met_2_15, quiet = TRUE, - reweight.method = "tc", reweight.tol = 1e-4, - cores = 14) - - parms_met_2_15_tc_e4 <- apply(sapply(f_met_2_15_tc_e4, function(x) x$bparms.optim), 1, mean) - parm_errors_met_2_15_tc_e4 <- (parms_met_2_15_tc_e4[names(parms_DFOP_lin_optim)] - - parms_DFOP_lin_optim) / parms_DFOP_lin_optim - expect_true(all(abs(parm_errors_met_2_15_tc_e4) < 0.01)) - - tcf_met_2_15_tc <- apply(sapply(f_met_2_15_tc_e4, function(x) x$tc_fitted), 1, mean, na.rm = TRUE) - - tcf_met_2_15_tc_error_model_errors <- (tcf_met_2_15_tc - c(0.5, 0.07)) / - c(0.5, 0.07) - - # Here we only get a precision < 30% for retrieving the original error model components - # from 15 datasets - expect_true(all(abs(tcf_met_2_15_tc_error_model_errors) < 0.3)) -}) diff --git a/vignettes/FOCUS_D.html b/vignettes/FOCUS_D.html index 0507740f..cb512e99 100644 --- a/vignettes/FOCUS_D.html +++ b/vignettes/FOCUS_D.html @@ -11,7 +11,7 @@ - + Example evaluation of FOCUS Example Dataset D @@ -372,7 +372,7 @@ $(document).ready(function () {

Example evaluation of FOCUS Example Dataset D

Johannes Ranke

-

2019-01-31

+

2019-04-04

@@ -438,16 +438,16 @@ print(FOCUS_2006_D)
fit <- mkinfit(SFO_SFO, FOCUS_2006_D, quiet = TRUE)

A plot of the fit including a residual plot for both observed variables is obtained using the plot_sep method for mkinfit objects, which shows separate graphs for all compounds and their residuals.

plot_sep(fit, lpos = c("topright", "bottomright"))
-

+

Confidence intervals for the parameter estimates are obtained using the mkinparplot function.

mkinparplot(fit)
-

+

A comprehensive report of the results is obtained using the summary method for mkinfit objects.

summary(fit)
-
## mkin version used for fitting:    0.9.47.5 
-## R version used for fitting:       3.5.2 
-## Date of fit:     Thu Jan 31 16:45:24 2019 
-## Date of summary: Thu Jan 31 16:45:24 2019 
+
## mkin version used for fitting:    0.9.49.1 
+## R version used for fitting:       3.5.3 
+## Date of fit:     Thu Apr  4 17:06:46 2019 
+## Date of summary: Thu Apr  4 17:06:46 2019 
 ## 
 ## Equations:
 ## d_parent/dt = - k_parent_sink * parent - k_parent_m1 * parent
@@ -455,9 +455,10 @@ print(FOCUS_2006_D)
## ## Model predictions using solution type deSolve ## -## Fitted with method Port using 153 model solutions performed in 0.684 s +## Fitted with method using 202 model solutions performed in 0.554 s ## -## Weighting: none +## Error model: +## NULL ## ## Starting values for parameters to be optimised: ## value type @@ -465,6 +466,7 @@ print(FOCUS_2006_D)
## k_parent_sink 0.1000 deparm ## k_parent_m1 0.1001 deparm ## k_m1_sink 0.1002 deparm +## sigma 1.0000 error ## ## Starting values for the transformed parameters actually optimised: ## value lower upper @@ -472,6 +474,7 @@ print(FOCUS_2006_D) ## log_k_parent_sink -2.302585 -Inf Inf ## log_k_parent_m1 -2.301586 -Inf Inf ## log_k_m1_sink -2.300587 -Inf Inf +## sigma 1.000000 0 Inf ## ## Fixed parameter values: ## value type @@ -479,33 +482,40 @@ print(FOCUS_2006_D) ## ## Optimised, transformed parameters with symmetric confidence intervals: ## Estimate Std. Error Lower Upper -## parent_0 99.600 1.61400 96.330 102.900 -## log_k_parent_sink -3.038 0.07826 -3.197 -2.879 -## log_k_parent_m1 -2.980 0.04124 -3.064 -2.897 -## log_k_m1_sink -5.248 0.13610 -5.523 -4.972 +## parent_0 99.600 1.53000 96.490 102.700 +## log_k_parent_sink -3.038 0.07433 -3.189 -2.887 +## log_k_parent_m1 -2.980 0.03931 -3.060 -2.901 +## log_k_m1_sink -5.248 0.12980 -5.511 -4.984 +## sigma 3.046 0.34060 2.355 3.738 ## ## Parameter correlation: -## parent_0 log_k_parent_sink log_k_parent_m1 log_k_m1_sink -## parent_0 1.00000 0.6075 -0.06625 -0.1701 -## log_k_parent_sink 0.60752 1.0000 -0.08740 -0.6253 -## log_k_parent_m1 -0.06625 -0.0874 1.00000 0.4716 -## log_k_m1_sink -0.17006 -0.6253 0.47164 1.0000 -## -## Residual standard error: 3.211 on 36 degrees of freedom +## parent_0 log_k_parent_sink log_k_parent_m1 +## parent_0 1.000e+00 6.067e-01 -6.372e-02 +## log_k_parent_sink 6.067e-01 1.000e+00 -8.550e-02 +## log_k_parent_m1 -6.372e-02 -8.550e-02 1.000e+00 +## log_k_m1_sink -1.688e-01 -6.252e-01 4.731e-01 +## sigma -1.333e-06 -2.592e-08 -1.153e-06 +## log_k_m1_sink sigma +## parent_0 -1.688e-01 -1.333e-06 +## log_k_parent_sink -6.252e-01 -2.592e-08 +## log_k_parent_m1 4.731e-01 -1.153e-06 +## log_k_m1_sink 1.000e+00 -5.292e-07 +## sigma -5.292e-07 1.000e+00 ## ## Backtransformed parameters: ## Confidence intervals for internally transformed parameters are asymmetric. ## t-test (unrealistically) based on the assumption of normal distribution ## for estimators of untransformed parameters. ## Estimate t value Pr(>t) Lower Upper -## parent_0 99.600000 61.720 2.024e-38 96.330000 1.029e+02 -## k_parent_sink 0.047920 12.780 3.050e-15 0.040890 5.616e-02 -## k_parent_m1 0.050780 24.250 3.407e-24 0.046700 5.521e-02 -## k_m1_sink 0.005261 7.349 5.758e-09 0.003992 6.933e-03 +## parent_0 99.600000 65.080 2.070e-38 96.490000 1.027e+02 +## k_parent_sink 0.047920 13.450 1.071e-15 0.041210 5.573e-02 +## k_parent_m1 0.050780 25.440 1.834e-24 0.046880 5.500e-02 +## k_m1_sink 0.005261 7.705 2.408e-09 0.004042 6.846e-03 +## sigma 3.046000 8.944 7.220e-11 2.355000 3.738e+00 ## ## Chi2 error levels in percent: ## err.min n.optim df -## All data 6.398 4 15 +## All data 6.572 5 14 ## parent 6.827 3 6 ## m1 4.490 1 9 ## @@ -522,16 +532,16 @@ print(FOCUS_2006_D) ## ## Data: ## time variable observed predicted residual -## 0 parent 99.46 99.59848 -1.385e-01 -## 0 parent 102.04 99.59848 2.442e+00 -## 1 parent 93.50 90.23787 3.262e+00 -## 1 parent 92.50 90.23787 2.262e+00 -## 3 parent 63.23 74.07320 -1.084e+01 -## 3 parent 68.99 74.07320 -5.083e+00 +## 0 parent 99.46 99.59847 -1.385e-01 +## 0 parent 102.04 99.59847 2.442e+00 +## 1 parent 93.50 90.23786 3.262e+00 +## 1 parent 92.50 90.23786 2.262e+00 +## 3 parent 63.23 74.07319 -1.084e+01 +## 3 parent 68.99 74.07319 -5.083e+00 ## 7 parent 52.32 49.91207 2.408e+00 ## 7 parent 55.13 49.91207 5.218e+00 -## 14 parent 27.27 25.01257 2.257e+00 -## 14 parent 26.64 25.01257 1.627e+00 +## 14 parent 27.27 25.01258 2.257e+00 +## 14 parent 26.64 25.01258 1.627e+00 ## 21 parent 11.50 12.53462 -1.035e+00 ## 21 parent 11.64 12.53462 -8.946e-01 ## 35 parent 2.85 3.14787 -2.979e-01 @@ -544,22 +554,22 @@ print(FOCUS_2006_D) ## 0 m1 0.00 0.00000 0.000e+00 ## 1 m1 4.84 4.80296 3.704e-02 ## 1 m1 5.64 4.80296 8.370e-01 -## 3 m1 12.91 13.02400 -1.140e-01 -## 3 m1 12.96 13.02400 -6.400e-02 -## 7 m1 22.97 25.04476 -2.075e+00 -## 7 m1 24.47 25.04476 -5.748e-01 -## 14 m1 41.69 36.69002 5.000e+00 -## 14 m1 33.21 36.69002 -3.480e+00 -## 21 m1 44.37 41.65310 2.717e+00 -## 21 m1 46.44 41.65310 4.787e+00 +## 3 m1 12.91 13.02399 -1.140e-01 +## 3 m1 12.96 13.02399 -6.399e-02 +## 7 m1 22.97 25.04475 -2.075e+00 +## 7 m1 24.47 25.04475 -5.748e-01 +## 14 m1 41.69 36.69001 5.000e+00 +## 14 m1 33.21 36.69001 -3.480e+00 +## 21 m1 44.37 41.65309 2.717e+00 +## 21 m1 46.44 41.65309 4.787e+00 ## 35 m1 41.22 43.31312 -2.093e+00 ## 35 m1 37.95 43.31312 -5.363e+00 ## 50 m1 41.19 41.21831 -2.831e-02 ## 50 m1 40.01 41.21831 -1.208e+00 -## 75 m1 40.09 36.44704 3.643e+00 -## 75 m1 33.85 36.44704 -2.597e+00 -## 100 m1 31.04 31.98163 -9.416e-01 -## 100 m1 33.13 31.98163 1.148e+00 +## 75 m1 40.09 36.44703 3.643e+00 +## 75 m1 33.85 36.44703 -2.597e+00 +## 100 m1 31.04 31.98162 -9.416e-01 +## 100 m1 33.13 31.98162 1.148e+00 ## 120 m1 25.15 28.78984 -3.640e+00 ## 120 m1 33.31 28.78984 4.520e+00 diff --git a/vignettes/FOCUS_L.html b/vignettes/FOCUS_L.html index b26a9e43..5e7e7c74 100644 --- a/vignettes/FOCUS_L.html +++ b/vignettes/FOCUS_L.html @@ -11,22 +11,1275 @@ - + Example evaluation of FOCUS Laboratory Data L1 to L3 - + - - - - - - - - - - + + + + + + + + + + @@ -105,17 +1364,77 @@ button.code-folding-btn:focus {
+ + + - @@ -1436,7 +1503,6 @@ $(document).ready(function () { -