diff options
Diffstat (limited to 'R/mkinfit.R')
-rw-r--r-- | R/mkinfit.R | 1801 |
1 files changed, 897 insertions, 904 deletions
diff --git a/R/mkinfit.R b/R/mkinfit.R index d182f5d0..17fd59d0 100644 --- a/R/mkinfit.R +++ b/R/mkinfit.R @@ -1,904 +1,897 @@ -# Copyright (C) 2010-2019 Johannes Ranke
-# Portions of this code are copyright (C) 2013 Eurofins Regulatory AG
-# Contact: jranke@uni-bremen.de
-
-# This file is part of the R package mkin
-
-# mkin is free software: you can redistribute it and/or modify it under the
-# terms of the GNU General Public License as published by the Free Software
-# Foundation, either version 3 of the License, or (at your option) any later
-# version.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
-# details.
-
-# You should have received a copy of the GNU General Public License along with
-# this program. If not, see <http://www.gnu.org/licenses/>
-if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value"))
-
-mkinfit <- function(mkinmod, observed,
- parms.ini = "auto",
- state.ini = "auto",
- err.ini = "auto",
- fixed_parms = NULL,
- fixed_initials = names(mkinmod$diffs)[-1],
- from_max_mean = FALSE,
- solution_type = c("auto", "analytical", "eigen", "deSolve"),
- method.ode = "lsoda",
- use_compiled = "auto",
- control = list(eval.max = 300, iter.max = 200),
- transform_rates = TRUE,
- transform_fractions = TRUE,
- quiet = FALSE,
- atol = 1e-8, rtol = 1e-10, n.outtimes = 100,
- error_model = c("const", "obs", "tc"),
- error_model_algorithm = c("auto", "d_3", "direct", "twostep", "threestep", "fourstep", "IRLS", "OLS"),
- reweight.tol = 1e-8, reweight.max.iter = 10,
- trace_parms = FALSE,
- ...)
-{
- # Check mkinmod and generate a model for the variable whith the highest value
- # if a suitable string is given
- parent_models_available = c("SFO", "FOMC", "DFOP", "HS", "SFORB", "IORE", "logistic")
- if (class(mkinmod) != "mkinmod") {
- presumed_parent_name = observed[which.max(observed$value), "name"]
- if (mkinmod[[1]] %in% parent_models_available) {
- speclist <- list(list(type = mkinmod, sink = TRUE))
- names(speclist) <- presumed_parent_name
- mkinmod <- mkinmod(speclist = speclist)
- } else {
- stop("Argument mkinmod must be of class mkinmod or a string containing one of\n ",
- paste(parent_models_available, collapse = ", "))
- }
- }
-
- # Get the names of the state variables in the model
- mod_vars <- names(mkinmod$diffs)
-
- # Get the names of observed variables
- obs_vars <- names(mkinmod$spec)
-
- # Subset observed data with names of observed data in the model and remove NA values
- observed <- subset(observed, name %in% obs_vars)
- observed <- subset(observed, !is.na(value))
-
- # Also remove zero values to avoid instabilities (e.g. of the 'tc' error model)
- if (any(observed$value == 0)) {
- warning("Observations with value of zero were removed from the data")
- observed <- subset(observed, value != 0)
- }
-
- # Obtain data for decline from maximum mean value if requested
- if (from_max_mean) {
- # This is only used for simple decline models
- if (length(obs_vars) > 1)
- stop("Decline from maximum is only implemented for models with a single observed variable")
-
- means <- aggregate(value ~ time, data = observed, mean, na.rm=TRUE)
- t_of_max <- means[which.max(means$value), "time"]
- observed <- subset(observed, time >= t_of_max)
- observed$time <- observed$time - t_of_max
- }
-
- # Number observations used for fitting
- n_observed <- nrow(observed)
-
- # Define starting values for parameters where not specified by the user
- if (parms.ini[[1]] == "auto") parms.ini = vector()
-
- # Warn for inital parameter specifications that are not in the model
- wrongpar.names <- setdiff(names(parms.ini), mkinmod$parms)
- if (length(wrongpar.names) > 0) {
- warning("Initial parameter(s) ", paste(wrongpar.names, collapse = ", "),
- " not used in the model")
- parms.ini <- parms.ini[setdiff(names(parms.ini), wrongpar.names)]
- }
-
- # Warn that the sum of formation fractions may exceed one if they are not
- # fitted in the transformed way
- if (mkinmod$use_of_ff == "max" & transform_fractions == FALSE) {
- warning("The sum of formation fractions may exceed one if you do not use ",
- "transform_fractions = TRUE." )
- for (box in mod_vars) {
- # Stop if formation fractions are not transformed and we have no sink
- if (mkinmod$spec[[box]]$sink == FALSE) {
- stop("If formation fractions are not transformed during the fitting, ",
- "it is not supported to turn off pathways to sink.\n ",
- "Consider turning on the transformation of formation fractions or ",
- "setting up a model with use_of_ff = 'min'.\n")
- }
- }
- }
-
- # Do not allow fixing formation fractions if we are using the ilr transformation,
- # this is not supported
- if (transform_fractions == TRUE && length(fixed_parms) > 0) {
- if (any(grepl("^f_", fixed_parms))) {
- stop("Fixing formation fractions is not supported when using the ilr ",
- "transformation.")
- }
- }
-
- # Set initial parameter values, including a small increment (salt)
- # to avoid linear dependencies (singular matrix) in Eigenvalue based solutions
- k_salt = 0
- defaultpar.names <- setdiff(mkinmod$parms, names(parms.ini))
- for (parmname in defaultpar.names) {
- # Default values for rate constants, depending on the parameterisation
- if (grepl("^k", parmname)) {
- parms.ini[parmname] = 0.1 + k_salt
- k_salt = k_salt + 1e-4
- }
- # Default values for rate constants for reversible binding
- if (grepl("free_bound$", parmname)) parms.ini[parmname] = 0.1
- if (grepl("bound_free$", parmname)) parms.ini[parmname] = 0.02
- # Default values for IORE exponents
- if (grepl("^N", parmname)) parms.ini[parmname] = 1.1
- # Default values for the FOMC, DFOP and HS models
- if (parmname == "alpha") parms.ini[parmname] = 1
- if (parmname == "beta") parms.ini[parmname] = 10
- if (parmname == "k1") parms.ini[parmname] = 0.1
- if (parmname == "k2") parms.ini[parmname] = 0.01
- if (parmname == "tb") parms.ini[parmname] = 5
- if (parmname == "g") parms.ini[parmname] = 0.5
- if (parmname == "kmax") parms.ini[parmname] = 0.1
- if (parmname == "k0") parms.ini[parmname] = 0.0001
- if (parmname == "r") parms.ini[parmname] = 0.2
- }
- # Default values for formation fractions in case they are present
- for (box in mod_vars) {
- f_names <- mkinmod$parms[grep(paste0("^f_", box), mkinmod$parms)]
- if (length(f_names) > 0) {
- # We need to differentiate between default and specified fractions
- # and set the unspecified to 1 - sum(specified)/n_unspecified
- f_default_names <- intersect(f_names, defaultpar.names)
- f_specified_names <- setdiff(f_names, defaultpar.names)
- sum_f_specified = sum(parms.ini[f_specified_names])
- if (sum_f_specified > 1) {
- stop("Starting values for the formation fractions originating from ",
- box, " sum up to more than 1.")
- }
- if (mkinmod$spec[[box]]$sink) n_unspecified = length(f_default_names) + 1
- else {
- n_unspecified = length(f_default_names)
- }
- parms.ini[f_default_names] <- (1 - sum_f_specified) / n_unspecified
- }
- }
-
- # Set default for state.ini if appropriate
- parent_name = names(mkinmod$spec)[[1]]
- if (state.ini[1] == "auto") {
- parent_time_0 = subset(observed, time == 0 & name == parent_name)$value
- parent_time_0_mean = mean(parent_time_0, na.rm = TRUE)
- if (is.na(parent_time_0_mean)) {
- state.ini = c(100, rep(0, length(mkinmod$diffs) - 1))
- } else {
- state.ini = c(parent_time_0_mean, rep(0, length(mkinmod$diffs) - 1))
- }
- }
-
- # Name the inital state variable values if they are not named yet
- if(is.null(names(state.ini))) names(state.ini) <- mod_vars
-
- # Transform initial parameter values for fitting
- transparms.ini <- transform_odeparms(parms.ini, mkinmod,
- transform_rates = transform_rates,
- transform_fractions = transform_fractions)
-
- # Parameters to be optimised:
- # Kinetic parameters in parms.ini whose names are not in fixed_parms
- parms.fixed <- parms.ini[fixed_parms]
- parms.optim <- parms.ini[setdiff(names(parms.ini), fixed_parms)]
-
- transparms.fixed <- transform_odeparms(parms.fixed, mkinmod,
- transform_rates = transform_rates,
- transform_fractions = transform_fractions)
- transparms.optim <- transform_odeparms(parms.optim, mkinmod,
- transform_rates = transform_rates,
- transform_fractions = transform_fractions)
-
- # Inital state variables in state.ini whose names are not in fixed_initials
- state.ini.fixed <- state.ini[fixed_initials]
- state.ini.optim <- state.ini[setdiff(names(state.ini), fixed_initials)]
-
- # Preserve names of state variables before renaming initial state variable
- # parameters
- state.ini.optim.boxnames <- names(state.ini.optim)
- state.ini.fixed.boxnames <- names(state.ini.fixed)
- if(length(state.ini.optim) > 0) {
- names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_")
- }
- if(length(state.ini.fixed) > 0) {
- names(state.ini.fixed) <- paste(names(state.ini.fixed), "0", sep="_")
- }
-
- # Decide if the solution of the model can be based on a simple analytical
- # formula, the spectral decomposition of the matrix (fundamental system)
- # or a numeric ode solver from the deSolve package
- # Prefer deSolve over eigen if a compiled model is present and use_compiled
- # is not set to FALSE
- solution_type = match.arg(solution_type)
- if (solution_type == "analytical" && length(mkinmod$spec) > 1)
- stop("Analytical solution not implemented for models with metabolites.")
- if (solution_type == "eigen" && !is.matrix(mkinmod$coefmat))
- stop("Eigenvalue based solution not possible, coefficient matrix not present.")
- if (solution_type == "auto") {
- if (length(mkinmod$spec) == 1) {
- solution_type = "analytical"
- } else {
- if (!is.null(mkinmod$cf) & use_compiled[1] != FALSE) {
- solution_type = "deSolve"
- } else {
- if (is.matrix(mkinmod$coefmat)) {
- solution_type = "eigen"
- if (max(observed$value, na.rm = TRUE) < 0.1) {
- stop("The combination of small observed values (all < 0.1) and solution_type = eigen is error-prone")
- }
- } else {
- solution_type = "deSolve"
- }
- }
- }
- }
-
- # Get the error model and the algorithm for fitting
- err_mod <- match.arg(error_model)
- error_model_algorithm = match.arg(error_model_algorithm)
- if (error_model_algorithm == "OLS") {
- if (err_mod != "const") stop("OLS is only appropriate for constant variance")
- }
- if (error_model_algorithm == "auto") {
- error_model_algorithm = switch(err_mod,
- const = "OLS", obs = "d_3", tc = "d_3")
- }
- errparm_names <- switch(err_mod,
- "const" = "sigma",
- "obs" = paste0("sigma_", obs_vars),
- "tc" = c("sigma_low", "rsd_high"))
- errparm_names_optim <- if (error_model_algorithm == "OLS") NULL else errparm_names
-
- # Define starting values for the error model
- if (err.ini[1] != "auto") {
- if (!identical(names(err.ini), errparm_names)) {
- stop("Please supply initial values for error model components ", paste(errparm_names, collapse = ", "))
- } else {
- errparms = err.ini
- }
- } else {
- if (err_mod == "const") {
- errparms = 3
- }
- if (err_mod == "obs") {
- errparms = rep(3, length(obs_vars))
- }
- if (err_mod == "tc") {
- errparms <- c(sigma_low = 0.1, rsd_high = 0.1)
- }
- names(errparms) <- errparm_names
- }
- if (error_model_algorithm == "OLS") {
- errparms_optim <- NULL
- } else {
- errparms_optim <- errparms
- }
-
- # Define outtimes for model solution.
- # Include time points at which observed data are available
- outtimes = sort(unique(c(observed$time, seq(min(observed$time),
- max(observed$time),
- length.out = n.outtimes))))
-
- # Define the objective function for optimisation, including (back)transformations
- cost_function <- function(P, trans = TRUE, OLS = FALSE, fixed_degparms = FALSE, fixed_errparms = FALSE, update_data = TRUE, ...)
- {
- assign("calls", calls + 1, inherits = TRUE) # Increase the model solution counter
-
- # Trace parameter values if requested and if we are actually optimising
- if(trace_parms & update_data) cat(P, "\n")
-
- # Determine local parameter values for the cost estimation
- if (is.numeric(fixed_degparms)) {
- cost_degparms <- fixed_degparms
- cost_errparms <- P
- degparms_fixed = TRUE
- } else {
- degparms_fixed = FALSE
- }
-
- if (is.numeric(fixed_errparms)) {
- cost_degparms <- P
- cost_errparms <- fixed_errparms
- errparms_fixed = TRUE
- } else {
- errparms_fixed = FALSE
- }
-
- if (OLS) {
- cost_degparms <- P
- cost_errparms <- numeric(0)
- }
-
- if (!OLS & !degparms_fixed & !errparms_fixed) {
- cost_degparms <- P[1:(length(P) - length(errparms))]
- cost_errparms <- P[(length(cost_degparms) + 1):length(P)]
- }
-
- # Initial states for t0
- if(length(state.ini.optim) > 0) {
- odeini <- c(cost_degparms[1:length(state.ini.optim)], state.ini.fixed)
- names(odeini) <- c(state.ini.optim.boxnames, state.ini.fixed.boxnames)
- } else {
- odeini <- state.ini.fixed
- names(odeini) <- state.ini.fixed.boxnames
- }
-
- odeparms.optim <- cost_degparms[(length(state.ini.optim) + 1):length(cost_degparms)]
-
- if (trans == TRUE) {
- odeparms <- c(odeparms.optim, transparms.fixed)
- parms <- backtransform_odeparms(odeparms, mkinmod,
- transform_rates = transform_rates,
- transform_fractions = transform_fractions)
- } else {
- parms <- c(odeparms.optim, parms.fixed)
- }
-
- # Solve the system with current parameter values
- out <- mkinpredict(mkinmod, parms,
- odeini, outtimes,
- solution_type = solution_type,
- use_compiled = use_compiled,
- method.ode = method.ode,
- atol = atol, rtol = rtol, ...)
-
- out_long <- mkin_wide_to_long(out, time = "time")
-
- if (err_mod == "const") {
- observed$std <- if (OLS) NA else cost_errparms["sigma"]
- }
- if (err_mod == "obs") {
- std_names <- paste0("sigma_", observed$name)
- observed$std <- cost_errparms[std_names]
- }
- if (err_mod == "tc") {
- tmp <- merge(observed, out_long, by = c("time", "name"))
- tmp$name <- ordered(tmp$name, levels = obs_vars)
- tmp <- tmp[order(tmp$name, tmp$time), ]
- observed$std <- sqrt(cost_errparms["sigma_low"]^2 + tmp$value.y^2 * cost_errparms["rsd_high"]^2)
- }
-
- cost_data <- merge(observed[c("name", "time", "value", "std")], out_long,
- by = c("name", "time"), suffixes = c(".observed", ".predicted"))
-
- if (OLS) {
- # Cost is the sum of squared residuals
- cost <- with(cost_data, sum((value.observed - value.predicted)^2))
- } else {
- # Cost is the negative log-likelihood
- cost <- - with(cost_data,
- sum(dnorm(x = value.observed, mean = value.predicted, sd = std, log = TRUE)))
- }
-
- # We update the current cost and data during the optimisation, not
- # during hessian calculations
- if (update_data) {
-
- assign("out_predicted", out_long, inherits = TRUE)
- assign("current_data", cost_data, inherits = TRUE)
-
- if (cost < cost.current) {
- assign("cost.current", cost, inherits = TRUE)
- if (!quiet) cat(ifelse(OLS, "Sum of squared residuals", "Negative log-likelihood"),
- " at call ", calls, ": ", cost.current, "\n", sep = "")
- }
- }
- return(cost)
- }
-
- names_optim <- c(names(state.ini.optim),
- names(transparms.optim),
- errparm_names_optim)
- n_optim <- length(names_optim)
-
- # Define lower and upper bounds other than -Inf and Inf for parameters
- # for which no internal transformation is requested in the call to mkinfit
- # and for optimised error model parameters
- lower <- rep(-Inf, n_optim)
- upper <- rep(Inf, n_optim)
- names(lower) <- names(upper) <- names_optim
-
- # IORE exponents are not transformed, but need a lower bound
- index_N <- grep("^N", names(lower))
- lower[index_N] <- 0
-
- if (!transform_rates) {
- index_k <- grep("^k_", names(lower))
- lower[index_k] <- 0
- index_k__iore <- grep("^k__iore_", names(lower))
- lower[index_k__iore] <- 0
- other_rate_parms <- intersect(c("alpha", "beta", "k1", "k2", "tb", "r"), names(lower))
- lower[other_rate_parms] <- 0
- }
-
- if (!transform_fractions) {
- index_f <- grep("^f_", names(upper))
- lower[index_f] <- 0
- upper[index_f] <- 1
- other_fraction_parms <- intersect(c("g"), names(upper))
- lower[other_fraction_parms] <- 0
- upper[other_fraction_parms] <- 1
- }
-
- if (err_mod == "const") {
- if (error_model_algorithm != "OLS") {
- lower["sigma"] <- 0
- }
- }
- if (err_mod == "obs") {
- index_sigma <- grep("^sigma_", names(lower))
- lower[index_sigma] <- 0
- }
- if (err_mod == "tc") {
- lower["sigma_low"] <- 0
- lower["rsd_high"] <- 0
- }
-
- # Counter for cost function evaluations
- calls = 0
- cost.current <- Inf
- out_predicted <- NA
- current_data <- NA
-
- # Show parameter names if tracing is requested
- if(trace_parms) cat(names_optim, "\n")
-
- # browser()
-
- # Do the fit and take the time until the hessians are calculated
- fit_time <- system.time({
- degparms <- c(state.ini.optim, transparms.optim)
- n_degparms <- length(degparms)
- degparms_index <- seq(1, n_degparms)
- errparms_index <- seq(n_degparms + 1, length.out = length(errparms))
-
- if (error_model_algorithm == "d_3") {
- if (!quiet) message("Directly optimising the complete model")
- parms.start <- c(degparms, errparms)
- fit_direct <- nlminb(parms.start, cost_function,
- lower = lower[names(parms.start)],
- upper = upper[names(parms.start)],
- control = control, ...)
- fit_direct$logLik <- - cost.current
- if (error_model_algorithm == "direct") {
- degparms <- fit_direct$par[degparms_index]
- errparms <- fit_direct$par[errparms_index]
- } else {
- cost.current <- Inf # reset to avoid conflict with the OLS step
- }
- }
- if (error_model_algorithm != "direct") {
- if (!quiet) message("Ordinary least squares optimisation")
- fit <- nlminb(degparms, cost_function, control = control,
- lower = lower[names(degparms)],
- upper = upper[names(degparms)], OLS = TRUE, ...)
- degparms <- fit$par
-
- # Get the maximum likelihood estimate for sigma at the optimum parameter values
- current_data$residual <- current_data$value.observed - current_data$value.predicted
- sigma_mle <- sqrt(sum(current_data$residual^2)/nrow(current_data))
-
- # Use that estimate for the constant variance, or as first guess if err_mod = "obs"
- if (err_mod != "tc") {
- errparms[names(errparms)] <- sigma_mle
- }
- fit$par <- c(fit$par, errparms)
-
- cost.current <- cost_function(c(degparms, errparms), OLS = FALSE)
- fit$logLik <- - cost.current
- }
- if (error_model_algorithm %in% c("threestep", "fourstep", "d_3")) {
- if (!quiet) message("Optimising the error model")
- fit <- nlminb(errparms, cost_function, control = control,
- lower = lower[names(errparms)],
- upper = upper[names(errparms)],
- fixed_degparms = degparms, ...)
- errparms <- fit$par
- }
- if (error_model_algorithm == "fourstep") {
- if (!quiet) message("Optimising the degradation model")
- fit <- nlminb(degparms, cost_function, control = control,
- lower = lower[names(degparms)],
- upper = upper[names(degparms)],
- fixed_errparms = errparms, ...)
- degparms <- fit$par
- }
- if (error_model_algorithm %in%
- c("direct", "twostep", "threestep", "fourstep", "d_3")) {
- if (!quiet) message("Optimising the complete model")
- parms.start <- c(degparms, errparms)
- fit <- nlminb(parms.start, cost_function,
- lower = lower[names(parms.start)],
- upper = upper[names(parms.start)],
- control = control, ...)
- degparms <- fit$par[degparms_index]
- errparms <- fit$par[errparms_index]
- fit$logLik <- - cost.current
-
- if (error_model_algorithm == "d_3") {
- d_3_messages = c(
- same = "Direct fitting and three-step fitting yield approximately the same likelihood",
- threestep = "Three-step fitting yielded a higher likelihood than direct fitting",
- direct = "Direct fitting yielded a higher likelihood than three-step fitting")
- rel_diff <- abs((fit_direct$logLik - fit$logLik))/-mean(c(fit_direct$logLik, fit$logLik))
- if (rel_diff < 0.0001) {
- if (!quiet) message(d_3_messages["same"])
- fit$d_3_message <- d_3_messages["same"]
- } else {
- if (fit$logLik > fit_direct$logLik) {
- if (!quiet) message(d_3_messages["threestep"])
- fit$d_3_message <- d_3_messages["threestep"]
- } else {
- if (!quiet) message(d_3_messages["direct"])
- fit <- fit_direct
- fit$d_3_message <- d_3_messages["direct"]
- }
- }
- }
- }
- if (err_mod != "const" & error_model_algorithm == "IRLS") {
- reweight.diff <- 1
- n.iter <- 0
- errparms_last <- errparms
-
- while (reweight.diff > reweight.tol &
- n.iter < reweight.max.iter) {
-
- if (!quiet) message("Optimising the error model")
- fit <- nlminb(errparms, cost_function, control = control,
- lower = lower[names(errparms)],
- upper = upper[names(errparms)],
- fixed_degparms = degparms, ...)
- errparms <- fit$par
-
- if (!quiet) message("Optimising the degradation model")
- fit <- nlminb(degparms, cost_function, control = control,
- lower = lower[names(degparms)],
- upper = upper[names(degparms)],
- fixed_errparms = errparms, ...)
- degparms <- fit$par
-
- reweight.diff <- dist(rbind(errparms, errparms_last))
- errparms_last <- errparms
-
- fit$par <- c(fit$par, errparms)
- cost.current <- cost_function(c(degparms, errparms), OLS = FALSE)
- fit$logLik <- - cost.current
- }
- }
-
- fit$hessian <- try(numDeriv::hessian(cost_function, c(degparms, errparms), OLS = FALSE,
- update_data = FALSE), silent = TRUE)
-
- # Backtransform parameters
- bparms.optim = backtransform_odeparms(fit$par, mkinmod,
- transform_rates = transform_rates,
- transform_fractions = transform_fractions)
- bparms.fixed = c(state.ini.fixed, parms.fixed)
- bparms.all = c(bparms.optim, parms.fixed)
-
- fit$hessian_notrans <- try(numDeriv::hessian(cost_function, c(bparms.all, errparms),
- OLS = FALSE, trans = FALSE, update_data = FALSE), silent = TRUE)
- })
-
- fit$error_model_algorithm <- error_model_algorithm
-
- if (fit$convergence != 0) {
- fit$warning = paste0("Optimisation did not converge:\n", fit$message)
- warning(fit$warning)
- } else {
- if(!quiet) message("Optimisation successfully terminated.\n")
- }
-
- # We need to return some more data for summary and plotting
- fit$solution_type <- solution_type
- fit$transform_rates <- transform_rates
- fit$transform_fractions <- transform_fractions
- fit$reweight.tol <- reweight.tol
- fit$reweight.max.iter <- reweight.max.iter
- fit$control <- control
- fit$calls <- calls
- fit$time <- fit_time
-
- # We also need the model for summary and plotting
- fit$mkinmod <- mkinmod
-
- # We need data and predictions for summary and plotting
- fit$observed <- observed
- fit$obs_vars <- obs_vars
- fit$predicted <- out_predicted
-
- # Residual sum of squares as a function of the fitted parameters
- fit$rss <- function(P) cost_function(P, OLS = TRUE, update_data = FALSE)
-
- # Log-likelihood with possibility to fix degparms or errparms
- fit$ll <- function(P, fixed_degparms = FALSE, fixed_errparms = FALSE) {
- - cost_function(P, fixed_degparms = fixed_degparms,
- fixed_errparms = fixed_errparms, OLS = FALSE, update_data = FALSE)
- }
-
- # Collect initial parameter values in three dataframes
- fit$start <- data.frame(value = c(state.ini.optim,
- parms.optim, errparms_optim))
- fit$start$type = c(rep("state", length(state.ini.optim)),
- rep("deparm", length(parms.optim)),
- rep("error", length(errparms_optim)))
-
- fit$start_transformed = data.frame(
- value = c(state.ini.optim, transparms.optim, errparms_optim),
- lower = lower,
- upper = upper)
-
- fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed))
- fit$fixed$type = c(rep("state", length(state.ini.fixed)),
- rep("deparm", length(parms.fixed)))
-
- # Sort observed, predicted and residuals
- current_data$name <- ordered(current_data$name, levels = obs_vars)
-
- ordered_data <- current_data[order(current_data$name, current_data$time), ]
-
- fit$data <- data.frame(time = ordered_data$time,
- variable = ordered_data$name,
- observed = ordered_data$value.observed,
- predicted = ordered_data$value.predicted)
-
- fit$data$residual <- fit$data$observed - fit$data$predicted
-
- fit$atol <- atol
- fit$rtol <- rtol
- fit$err_mod <- err_mod
-
- # Return different sets of backtransformed parameters for summary and plotting
- fit$bparms.optim <- bparms.optim
- fit$bparms.fixed <- bparms.fixed
-
- # Return ode and state parameters for further fitting
- fit$bparms.ode <- bparms.all[mkinmod$parms]
- fit$bparms.state <- c(bparms.all[setdiff(names(bparms.all), names(fit$bparms.ode))],
- state.ini.fixed)
- names(fit$bparms.state) <- gsub("_0$", "", names(fit$bparms.state))
-
- fit$errparms <- errparms
- fit$df.residual <- n_observed - length(c(degparms, errparms))
-
- fit$date <- date()
- fit$version <- as.character(utils::packageVersion("mkin"))
- fit$Rversion <- paste(R.version$major, R.version$minor, sep=".")
-
- class(fit) <- c("mkinfit", "modFit")
- return(fit)
-}
-
-summary.mkinfit <- function(object, data = TRUE, distimes = TRUE, alpha = 0.05, ...) {
- param <- object$par
- pnames <- names(param)
- bpnames <- names(object$bparms.optim)
- epnames <- names(object$errparms)
- p <- length(param)
- mod_vars <- names(object$mkinmod$diffs)
- covar <- try(solve(object$hessian), silent = TRUE)
- covar_notrans <- try(solve(object$hessian_notrans), silent = TRUE)
- rdf <- object$df.residual
-
- if (!is.numeric(covar) | is.na(covar[1])) {
- covar <- NULL
- se <- lci <- uci <- rep(NA, p)
- } else {
- rownames(covar) <- colnames(covar) <- pnames
- se <- sqrt(diag(covar))
- lci <- param + qt(alpha/2, rdf) * se
- uci <- param + qt(1-alpha/2, rdf) * se
- }
-
- beparms.optim <- c(object$bparms.optim, object$par[epnames])
- if (!is.numeric(covar_notrans) | is.na(covar_notrans[1])) {
- covar_notrans <- NULL
- se_notrans <- tval <- pval <- rep(NA, p)
- } else {
- rownames(covar_notrans) <- colnames(covar_notrans) <- c(bpnames, epnames)
- se_notrans <- sqrt(diag(covar_notrans))
- tval <- beparms.optim / se_notrans
- pval <- pt(abs(tval), rdf, lower.tail = FALSE)
- }
-
- names(se) <- pnames
-
- param <- cbind(param, se, lci, uci)
- dimnames(param) <- list(pnames, c("Estimate", "Std. Error", "Lower", "Upper"))
-
- bparam <- cbind(Estimate = beparms.optim, se_notrans,
- "t value" = tval, "Pr(>t)" = pval, Lower = NA, Upper = NA)
-
- # Transform boundaries of CI for one parameter at a time,
- # with the exception of sets of formation fractions (single fractions are OK).
- f_names_skip <- character(0)
- for (box in mod_vars) { # Figure out sets of fractions to skip
- f_names <- grep(paste("^f", box, sep = "_"), pnames, value = TRUE)
- n_paths <- length(f_names)
- if (n_paths > 1) f_names_skip <- c(f_names_skip, f_names)
- }
-
- for (pname in pnames) {
- if (!pname %in% f_names_skip) {
- par.lower <- param[pname, "Lower"]
- par.upper <- param[pname, "Upper"]
- names(par.lower) <- names(par.upper) <- pname
- bpl <- backtransform_odeparms(par.lower, object$mkinmod,
- object$transform_rates,
- object$transform_fractions)
- bpu <- backtransform_odeparms(par.upper, object$mkinmod,
- object$transform_rates,
- object$transform_fractions)
- bparam[names(bpl), "Lower"] <- bpl
- bparam[names(bpu), "Upper"] <- bpu
- }
- }
- bparam[epnames, c("Lower", "Upper")] <- param[epnames, c("Lower", "Upper")]
-
- ans <- list(
- version = as.character(utils::packageVersion("mkin")),
- Rversion = paste(R.version$major, R.version$minor, sep="."),
- date.fit = object$date,
- date.summary = date(),
- solution_type = object$solution_type,
- warning = object$warning,
- use_of_ff = object$mkinmod$use_of_ff,
- error_model_algorithm = object$error_model_algorithm,
- df = c(p, rdf),
- covar = covar,
- covar_notrans = covar_notrans,
- err_mod = object$err_mod,
- niter = object$iterations,
- calls = object$calls,
- time = object$time,
- par = param,
- bpar = bparam)
-
- if (!is.null(object$version)) {
- ans$fit_version <- object$version
- ans$fit_Rversion <- object$Rversion
- }
-
- ans$diffs <- object$mkinmod$diffs
- if(data) ans$data <- object$data
- ans$start <- object$start
- ans$start_transformed <- object$start_transformed
-
- ans$fixed <- object$fixed
-
- ans$errmin <- mkinerrmin(object, alpha = 0.05)
-
- if (object$calls > 0) {
- if (!is.null(ans$covar)){
- Corr <- cov2cor(ans$covar)
- rownames(Corr) <- colnames(Corr) <- rownames(ans$par)
- ans$Corr <- Corr
- } else {
- warning("Could not calculate correlation; no covariance matrix")
- }
- }
-
- ans$bparms.ode <- object$bparms.ode
- ep <- endpoints(object)
- if (length(ep$ff) != 0)
- ans$ff <- ep$ff
- if (distimes) ans$distimes <- ep$distimes
- if (length(ep$SFORB) != 0) ans$SFORB <- ep$SFORB
- if (!is.null(object$d_3_message)) ans$d_3_message <- object$d_3_message
- class(ans) <- c("summary.mkinfit", "summary.modFit")
- return(ans)
-}
-
-# Expanded from print.summary.modFit
-print.summary.mkinfit <- function(x, digits = max(3, getOption("digits") - 3), ...) {
- if (is.null(x$fit_version)) {
- cat("mkin version: ", x$version, "\n")
- cat("R version: ", x$Rversion, "\n")
- } else {
- cat("mkin version used for fitting: ", x$fit_version, "\n")
- cat("R version used for fitting: ", x$fit_Rversion, "\n")
- }
-
- cat("Date of fit: ", x$date.fit, "\n")
- cat("Date of summary:", x$date.summary, "\n")
-
- if (!is.null(x$warning)) cat("\n\nWarning:", x$warning, "\n\n")
-
- cat("\nEquations:\n")
- nice_diffs <- gsub("^(d.*) =", "\\1/dt =", x[["diffs"]])
- writeLines(strwrap(nice_diffs, exdent = 11))
- df <- x$df
- rdf <- df[2]
-
- cat("\nModel predictions using solution type", x$solution_type, "\n")
-
- cat("\nFitted using", x$calls, "model solutions performed in", x$time[["elapsed"]], "s\n")
-
- if (!is.null(x$err_mod)) {
- cat("\nError model: ")
- cat(switch(x$err_mod,
- const = "Constant variance",
- obs = "Variance unique to each observed variable",
- tc = "Two-component variance function"), "\n")
-
- cat("\nError model algorithm:", x$error_model_algorithm, "\n")
- if (!is.null(x$d_3_message)) cat(x$d_3_message, "\n")
- }
-
- cat("\nStarting values for parameters to be optimised:\n")
- print(x$start)
-
- cat("\nStarting values for the transformed parameters actually optimised:\n")
- print(x$start_transformed)
-
- cat("\nFixed parameter values:\n")
- if(length(x$fixed$value) == 0) cat("None\n")
- else print(x$fixed)
-
- cat("\nOptimised, transformed parameters with symmetric confidence intervals:\n")
- print(signif(x$par, digits = digits))
-
- if (x$calls > 0) {
- cat("\nParameter correlation:\n")
- if (!is.null(x$covar)){
- print(x$Corr, digits = digits, ...)
- } else {
- cat("No covariance matrix")
- }
- }
-
- cat("\nBacktransformed parameters:\n")
- cat("Confidence intervals for internally transformed parameters are asymmetric.\n")
- if ((x$version) < "0.9-36") {
- cat("To get the usual (questionable) t-test, upgrade mkin and repeat the fit.\n")
- print(signif(x$bpar, digits = digits))
- } else {
- cat("t-test (unrealistically) based on the assumption of normal distribution\n")
- cat("for estimators of untransformed parameters.\n")
- print(signif(x$bpar[, c(1, 3, 4, 5, 6)], digits = digits))
- }
-
- cat("\nFOCUS Chi2 error levels in percent:\n")
- x$errmin$err.min <- 100 * x$errmin$err.min
- print(x$errmin, digits=digits,...)
-
- printSFORB <- !is.null(x$SFORB)
- if(printSFORB){
- cat("\nEstimated Eigenvalues of SFORB model(s):\n")
- print(x$SFORB, digits=digits,...)
- }
-
- printff <- !is.null(x$ff)
- if(printff){
- cat("\nResulting formation fractions:\n")
- print(data.frame(ff = x$ff), digits=digits,...)
- }
-
- printdistimes <- !is.null(x$distimes)
- if(printdistimes){
- cat("\nEstimated disappearance times:\n")
- print(x$distimes, digits=digits,...)
- }
-
- printdata <- !is.null(x$data)
- if (printdata){
- cat("\nData:\n")
- print(format(x$data, digits = digits, ...), row.names = FALSE)
- }
-
- invisible(x)
-}
-# vim: set ts=2 sw=2 expandtab:
+if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) + +#' Fit a kinetic model to data with one or more state variables +#' +#' This function maximises the likelihood of the observed data using the Port +#' algorithm \code{\link{nlminb}}, and the specified initial or fixed +#' parameters and starting values. In each step of the optimsation, the +#' kinetic model is solved using the function \code{\link{mkinpredict}}. The +#' parameters of the selected error model are fitted simultaneously with the +#' degradation model parameters, as both of them are arguments of the +#' likelihood function. +#' +#' Per default, parameters in the kinetic models are internally transformed in +#' order to better satisfy the assumption of a normal distribution of their +#' estimators. +#' +#' @param mkinmod A list of class \code{\link{mkinmod}}, containing the kinetic +#' model to be fitted to the data, or one of the shorthand names ("SFO", +#' "FOMC", "DFOP", "HS", "SFORB", "IORE"). If a shorthand name is given, a +#' parent only degradation model is generated for the variable with the +#' highest value in \code{observed}. +#' @param observed A dataframe with the observed data. The first column called +#' "name" must contain the name of the observed variable for each data point. +#' The second column must contain the times of observation, named "time". +#' The third column must be named "value" and contain the observed values. +#' Zero values in the "value" column will be removed, with a warning, in +#' order to avoid problems with fitting the two-component error model. This +#' is not expected to be a problem, because in general, values of zero are +#' not observed in degradation data, because there is a lower limit of +#' detection. +#' @param parms.ini A named vector of initial values for the parameters, +#' including parameters to be optimised and potentially also fixed parameters +#' as indicated by \code{fixed_parms}. If set to "auto", initial values for +#' rate constants are set to default values. Using parameter names that are +#' not in the model gives an error. +#' +#' It is possible to only specify a subset of the parameters that the model +#' needs. You can use the parameter lists "bparms.ode" from a previously +#' fitted model, which contains the differential equation parameters from +#' this model. This works nicely if the models are nested. An example is +#' given below. +#' @param state.ini A named vector of initial values for the state variables of +#' the model. In case the observed variables are represented by more than one +#' model variable, the names will differ from the names of the observed +#' variables (see \code{map} component of \code{\link{mkinmod}}). The default +#' is to set the initial value of the first model variable to the mean of the +#' time zero values for the variable with the maximum observed value, and all +#' others to 0. If this variable has no time zero observations, its initial +#' value is set to 100. +#' @param err.ini A named vector of initial values for the error model +#' parameters to be optimised. If set to "auto", initial values are set to +#' default values. Otherwise, inital values for all error model parameters +#' must be given. +#' @param fixed_parms The names of parameters that should not be optimised but +#' rather kept at the values specified in \code{parms.ini}. +#' @param fixed_initials The names of model variables for which the initial +#' state at time 0 should be excluded from the optimisation. Defaults to all +#' state variables except for the first one. +#' @param from_max_mean If this is set to TRUE, and the model has only one +#' observed variable, then data before the time of the maximum observed value +#' (after averaging for each sampling time) are discarded, and this time is +#' subtracted from all remaining time values, so the time of the maximum +#' observed mean value is the new time zero. +#' @param solution_type If set to "eigen", the solution of the system of +#' differential equations is based on the spectral decomposition of the +#' coefficient matrix in cases that this is possible. If set to "deSolve", a +#' numerical ode solver from package \code{\link{deSolve}} is used. If set to +#' "analytical", an analytical solution of the model is used. This is only +#' implemented for simple degradation experiments with only one state +#' variable, i.e. with no metabolites. The default is "auto", which uses +#' "analytical" if possible, otherwise "deSolve" if a compiler is present, +#' and "eigen" if no compiler is present and the model can be expressed using +#' eigenvalues and eigenvectors. This argument is passed on to the helper +#' function \code{\link{mkinpredict}}. +#' @param method.ode The solution method passed via \code{\link{mkinpredict}} +#' to \code{\link{ode}} in case the solution type is "deSolve". The default +#' "lsoda" is performant, but sometimes fails to converge. +#' @param use_compiled If set to \code{FALSE}, no compiled version of the +#' \code{\link{mkinmod}} model is used in the calls to +#' \code{\link{mkinpredict}} even if a compiled version is present. +#' @param control A list of control arguments passed to \code{\link{nlminb}}. +#' @param transform_rates Boolean specifying if kinetic rate constants should +#' be transformed in the model specification used in the fitting for better +#' compliance with the assumption of normal distribution of the estimator. If +#' TRUE, also alpha and beta parameters of the FOMC model are +#' log-transformed, as well as k1 and k2 rate constants for the DFOP and HS +#' models and the break point tb of the HS model. If FALSE, zero is used as +#' a lower bound for the rates in the optimisation. +#' @param transform_fractions Boolean specifying if formation fractions +#' constants should be transformed in the model specification used in the +#' fitting for better compliance with the assumption of normal distribution +#' of the estimator. The default (TRUE) is to do transformations. If TRUE, +#' the g parameter of the DFOP and HS models are also transformed, as they +#' can also be seen as compositional data. The transformation used for these +#' transformations is the \code{\link{ilr}} transformation. +#' @param quiet Suppress printing out the current value of the negative +#' log-likelihood after each improvement? +#' @param atol Absolute error tolerance, passed to \code{\link{ode}}. Default +#' is 1e-8, lower than in \code{\link{lsoda}}. +#' @param rtol Absolute error tolerance, passed to \code{\link{ode}}. Default +#' is 1e-10, much lower than in \code{\link{lsoda}}. +#' @param n.outtimes The length of the dataseries that is produced by the model +#' prediction function \code{\link{mkinpredict}}. This impacts the accuracy +#' of the numerical solver if that is used (see \code{solution_type} +#' argument. The default value is 100. +#' @param error_model If the error model is "const", a constant standard +#' deviation is assumed. +#' +#' If the error model is "obs", each observed variable is assumed to have its +#' own variance. +#' +#' If the error model is "tc" (two-component error model), a two component +#' error model similar to the one described by Rocke and Lorenzato (1995) is +#' used for setting up the likelihood function. Note that this model +#' deviates from the model by Rocke and Lorenzato, as their model implies +#' that the errors follow a lognormal distribution for large values, not a +#' normal distribution as assumed by this method. +#' @param error_model_algorithm If "auto", the selected algorithm depends on +#' the error model. If the error model is "const", unweighted nonlinear +#' least squares fitting ("OLS") is selected. If the error model is "obs", or +#' "tc", the "d_3" algorithm is selected. +#' +#' The algorithm "d_3" will directly minimize the negative log-likelihood and +#' - independently - also use the three step algorithm described below. The +#' fit with the higher likelihood is returned. +#' +#' The algorithm "direct" will directly minimize the negative log-likelihood. +#' +#' The algorithm "twostep" will minimize the negative log-likelihood after an +#' initial unweighted least squares optimisation step. +#' +#' The algorithm "threestep" starts with unweighted least squares, then +#' optimizes only the error model using the degradation model parameters +#' found, and then minimizes the negative log-likelihood with free +#' degradation and error model parameters. +#' +#' The algorithm "fourstep" starts with unweighted least squares, then +#' optimizes only the error model using the degradation model parameters +#' found, then optimizes the degradation model again with fixed error model +#' parameters, and finally minimizes the negative log-likelihood with free +#' degradation and error model parameters. +#' +#' The algorithm "IRLS" (Iteratively Reweighted Least Squares) starts with +#' unweighted least squares, and then iterates optimization of the error +#' model parameters and subsequent optimization of the degradation model +#' using those error model parameters, until the error model parameters +#' converge. +#' @param reweight.tol Tolerance for the convergence criterion calculated from +#' the error model parameters in IRLS fits. +#' @param reweight.max.iter Maximum number of iterations in IRLS fits. +#' @param trace_parms Should a trace of the parameter values be listed? +#' @param \dots Further arguments that will be passed on to +#' \code{\link{deSolve}}. +#' @importFrom stats nlminb aggregate dist +#' @return A list with "mkinfit" in the class attribute. A summary can be +#' obtained by \code{\link{summary.mkinfit}}. +#' @note When using the "IORE" submodel for metabolites, fitting with +#' "transform_rates = TRUE" (the default) often leads to failures of the +#' numerical ODE solver. In this situation it may help to switch off the +#' internal rate transformation. +#' @author Johannes Ranke +#' @seealso Plotting methods \code{\link{plot.mkinfit}} and +#' \code{\link{mkinparplot}}. +#' +#' Comparisons of models fitted to the same data can be made using +#' \code{\link{AIC}} by virtue of the method \code{\link{logLik.mkinfit}}. +#' +#' Fitting of several models to several datasets in a single call to +#' \code{\link{mmkin}}. +#' @source Rocke, David M. und Lorenzato, Stefan (1995) A two-component model +#' for measurement error in analytical chemistry. Technometrics 37(2), 176-184. +#' @examples +#' +#' # Use shorthand notation for parent only degradation +#' fit <- mkinfit("FOMC", FOCUS_2006_C, quiet = TRUE) +#' summary(fit) +#' +#' # One parent compound, one metabolite, both single first order. +#' # Use mkinsub for convenience in model formulation. Pathway to sink included per default. +#' SFO_SFO <- mkinmod( +#' parent = mkinsub("SFO", "m1"), +#' m1 = mkinsub("SFO")) +#' # Fit the model to the FOCUS example dataset D using defaults +#' print(system.time(fit <- mkinfit(SFO_SFO, FOCUS_2006_D, +#' solution_type = "eigen", quiet = TRUE))) +#' coef(fit) +#' endpoints(fit) +#' \dontrun{ +#' # deSolve is slower when no C compiler (gcc) was available during model generation +#' print(system.time(fit.deSolve <- mkinfit(SFO_SFO, FOCUS_2006_D, +#' solution_type = "deSolve"))) +#' coef(fit.deSolve) +#' endpoints(fit.deSolve) +#' } +#' +#' # Use stepwise fitting, using optimised parameters from parent only fit, FOMC +#' \dontrun{ +#' FOMC_SFO <- mkinmod( +#' parent = mkinsub("FOMC", "m1"), +#' m1 = mkinsub("SFO")) +#' # Fit the model to the FOCUS example dataset D using defaults +#' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D, quiet = TRUE) +#' # Use starting parameters from parent only FOMC fit +#' fit.FOMC = mkinfit("FOMC", FOCUS_2006_D, quiet = TRUE) +#' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D, quiet = TRUE, +#' parms.ini = fit.FOMC$bparms.ode) +#' +#' # Use stepwise fitting, using optimised parameters from parent only fit, SFORB +#' SFORB_SFO <- mkinmod( +#' parent = list(type = "SFORB", to = "m1", sink = TRUE), +#' m1 = list(type = "SFO")) +#' # Fit the model to the FOCUS example dataset D using defaults +#' fit.SFORB_SFO <- mkinfit(SFORB_SFO, FOCUS_2006_D, quiet = TRUE) +#' fit.SFORB_SFO.deSolve <- mkinfit(SFORB_SFO, FOCUS_2006_D, solution_type = "deSolve", +#' quiet = TRUE) +#' # Use starting parameters from parent only SFORB fit (not really needed in this case) +#' fit.SFORB = mkinfit("SFORB", FOCUS_2006_D, quiet = TRUE) +#' fit.SFORB_SFO <- mkinfit(SFORB_SFO, FOCUS_2006_D, parms.ini = fit.SFORB$bparms.ode, quiet = TRUE) +#' } +#' +#' \dontrun{ +#' # Weighted fits, including IRLS +#' SFO_SFO.ff <- mkinmod(parent = mkinsub("SFO", "m1"), +#' m1 = mkinsub("SFO"), use_of_ff = "max") +#' f.noweight <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, quiet = TRUE) +#' summary(f.noweight) +#' f.obs <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, error_model = "obs", quiet = TRUE) +#' summary(f.obs) +#' f.tc <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, error_model = "tc", quiet = TRUE) +#' summary(f.tc) +#' } +#' +#' +#' @export +mkinfit <- function(mkinmod, observed, + parms.ini = "auto", + state.ini = "auto", + err.ini = "auto", + fixed_parms = NULL, + fixed_initials = names(mkinmod$diffs)[-1], + from_max_mean = FALSE, + solution_type = c("auto", "analytical", "eigen", "deSolve"), + method.ode = "lsoda", + use_compiled = "auto", + control = list(eval.max = 300, iter.max = 200), + transform_rates = TRUE, + transform_fractions = TRUE, + quiet = FALSE, + atol = 1e-8, rtol = 1e-10, n.outtimes = 100, + error_model = c("const", "obs", "tc"), + error_model_algorithm = c("auto", "d_3", "direct", "twostep", "threestep", "fourstep", "IRLS", "OLS"), + reweight.tol = 1e-8, reweight.max.iter = 10, + trace_parms = FALSE, + ...) +{ + # Check mkinmod and generate a model for the variable whith the highest value + # if a suitable string is given + parent_models_available = c("SFO", "FOMC", "DFOP", "HS", "SFORB", "IORE", "logistic") + if (class(mkinmod) != "mkinmod") { + presumed_parent_name = observed[which.max(observed$value), "name"] + if (mkinmod[[1]] %in% parent_models_available) { + speclist <- list(list(type = mkinmod, sink = TRUE)) + names(speclist) <- presumed_parent_name + mkinmod <- mkinmod(speclist = speclist) + } else { + stop("Argument mkinmod must be of class mkinmod or a string containing one of\n ", + paste(parent_models_available, collapse = ", ")) + } + } + + # Get the names of the state variables in the model + mod_vars <- names(mkinmod$diffs) + + # Get the names of observed variables + obs_vars <- names(mkinmod$spec) + + # Subset observed data with names of observed data in the model and remove NA values + observed <- subset(observed, name %in% obs_vars) + observed <- subset(observed, !is.na(value)) + + # Also remove zero values to avoid instabilities (e.g. of the 'tc' error model) + if (any(observed$value == 0)) { + warning("Observations with value of zero were removed from the data") + observed <- subset(observed, value != 0) + } + + # Obtain data for decline from maximum mean value if requested + if (from_max_mean) { + # This is only used for simple decline models + if (length(obs_vars) > 1) + stop("Decline from maximum is only implemented for models with a single observed variable") + + means <- aggregate(value ~ time, data = observed, mean, na.rm=TRUE) + t_of_max <- means[which.max(means$value), "time"] + observed <- subset(observed, time >= t_of_max) + observed$time <- observed$time - t_of_max + } + + # Number observations used for fitting + n_observed <- nrow(observed) + + # Define starting values for parameters where not specified by the user + if (parms.ini[[1]] == "auto") parms.ini = vector() + + # Warn for inital parameter specifications that are not in the model + wrongpar.names <- setdiff(names(parms.ini), mkinmod$parms) + if (length(wrongpar.names) > 0) { + warning("Initial parameter(s) ", paste(wrongpar.names, collapse = ", "), + " not used in the model") + parms.ini <- parms.ini[setdiff(names(parms.ini), wrongpar.names)] + } + + # Warn that the sum of formation fractions may exceed one if they are not + # fitted in the transformed way + if (mkinmod$use_of_ff == "max" & transform_fractions == FALSE) { + warning("The sum of formation fractions may exceed one if you do not use ", + "transform_fractions = TRUE." ) + for (box in mod_vars) { + # Stop if formation fractions are not transformed and we have no sink + if (mkinmod$spec[[box]]$sink == FALSE) { + stop("If formation fractions are not transformed during the fitting, ", + "it is not supported to turn off pathways to sink.\n ", + "Consider turning on the transformation of formation fractions or ", + "setting up a model with use_of_ff = 'min'.\n") + } + } + } + + # Do not allow fixing formation fractions if we are using the ilr transformation, + # this is not supported + if (transform_fractions == TRUE && length(fixed_parms) > 0) { + if (any(grepl("^f_", fixed_parms))) { + stop("Fixing formation fractions is not supported when using the ilr ", + "transformation.") + } + } + + # Set initial parameter values, including a small increment (salt) + # to avoid linear dependencies (singular matrix) in Eigenvalue based solutions + k_salt = 0 + defaultpar.names <- setdiff(mkinmod$parms, names(parms.ini)) + for (parmname in defaultpar.names) { + # Default values for rate constants, depending on the parameterisation + if (grepl("^k", parmname)) { + parms.ini[parmname] = 0.1 + k_salt + k_salt = k_salt + 1e-4 + } + # Default values for rate constants for reversible binding + if (grepl("free_bound$", parmname)) parms.ini[parmname] = 0.1 + if (grepl("bound_free$", parmname)) parms.ini[parmname] = 0.02 + # Default values for IORE exponents + if (grepl("^N", parmname)) parms.ini[parmname] = 1.1 + # Default values for the FOMC, DFOP and HS models + if (parmname == "alpha") parms.ini[parmname] = 1 + if (parmname == "beta") parms.ini[parmname] = 10 + if (parmname == "k1") parms.ini[parmname] = 0.1 + if (parmname == "k2") parms.ini[parmname] = 0.01 + if (parmname == "tb") parms.ini[parmname] = 5 + if (parmname == "g") parms.ini[parmname] = 0.5 + if (parmname == "kmax") parms.ini[parmname] = 0.1 + if (parmname == "k0") parms.ini[parmname] = 0.0001 + if (parmname == "r") parms.ini[parmname] = 0.2 + } + # Default values for formation fractions in case they are present + for (box in mod_vars) { + f_names <- mkinmod$parms[grep(paste0("^f_", box), mkinmod$parms)] + if (length(f_names) > 0) { + # We need to differentiate between default and specified fractions + # and set the unspecified to 1 - sum(specified)/n_unspecified + f_default_names <- intersect(f_names, defaultpar.names) + f_specified_names <- setdiff(f_names, defaultpar.names) + sum_f_specified = sum(parms.ini[f_specified_names]) + if (sum_f_specified > 1) { + stop("Starting values for the formation fractions originating from ", + box, " sum up to more than 1.") + } + if (mkinmod$spec[[box]]$sink) n_unspecified = length(f_default_names) + 1 + else { + n_unspecified = length(f_default_names) + } + parms.ini[f_default_names] <- (1 - sum_f_specified) / n_unspecified + } + } + + # Set default for state.ini if appropriate + parent_name = names(mkinmod$spec)[[1]] + if (state.ini[1] == "auto") { + parent_time_0 = subset(observed, time == 0 & name == parent_name)$value + parent_time_0_mean = mean(parent_time_0, na.rm = TRUE) + if (is.na(parent_time_0_mean)) { + state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)) + } else { + state.ini = c(parent_time_0_mean, rep(0, length(mkinmod$diffs) - 1)) + } + } + + # Name the inital state variable values if they are not named yet + if(is.null(names(state.ini))) names(state.ini) <- mod_vars + + # Transform initial parameter values for fitting + transparms.ini <- transform_odeparms(parms.ini, mkinmod, + transform_rates = transform_rates, + transform_fractions = transform_fractions) + + # Parameters to be optimised: + # Kinetic parameters in parms.ini whose names are not in fixed_parms + parms.fixed <- parms.ini[fixed_parms] + parms.optim <- parms.ini[setdiff(names(parms.ini), fixed_parms)] + + transparms.fixed <- transform_odeparms(parms.fixed, mkinmod, + transform_rates = transform_rates, + transform_fractions = transform_fractions) + transparms.optim <- transform_odeparms(parms.optim, mkinmod, + transform_rates = transform_rates, + transform_fractions = transform_fractions) + + # Inital state variables in state.ini whose names are not in fixed_initials + state.ini.fixed <- state.ini[fixed_initials] + state.ini.optim <- state.ini[setdiff(names(state.ini), fixed_initials)] + + # Preserve names of state variables before renaming initial state variable + # parameters + state.ini.optim.boxnames <- names(state.ini.optim) + state.ini.fixed.boxnames <- names(state.ini.fixed) + if(length(state.ini.optim) > 0) { + names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_") + } + if(length(state.ini.fixed) > 0) { + names(state.ini.fixed) <- paste(names(state.ini.fixed), "0", sep="_") + } + + # Decide if the solution of the model can be based on a simple analytical + # formula, the spectral decomposition of the matrix (fundamental system) + # or a numeric ode solver from the deSolve package + # Prefer deSolve over eigen if a compiled model is present and use_compiled + # is not set to FALSE + solution_type = match.arg(solution_type) + if (solution_type == "analytical" && length(mkinmod$spec) > 1) + stop("Analytical solution not implemented for models with metabolites.") + if (solution_type == "eigen" && !is.matrix(mkinmod$coefmat)) + stop("Eigenvalue based solution not possible, coefficient matrix not present.") + if (solution_type == "auto") { + if (length(mkinmod$spec) == 1) { + solution_type = "analytical" + } else { + if (!is.null(mkinmod$cf) & use_compiled[1] != FALSE) { + solution_type = "deSolve" + } else { + if (is.matrix(mkinmod$coefmat)) { + solution_type = "eigen" + if (max(observed$value, na.rm = TRUE) < 0.1) { + stop("The combination of small observed values (all < 0.1) and solution_type = eigen is error-prone") + } + } else { + solution_type = "deSolve" + } + } + } + } + + # Get the error model and the algorithm for fitting + err_mod <- match.arg(error_model) + error_model_algorithm = match.arg(error_model_algorithm) + if (error_model_algorithm == "OLS") { + if (err_mod != "const") stop("OLS is only appropriate for constant variance") + } + if (error_model_algorithm == "auto") { + error_model_algorithm = switch(err_mod, + const = "OLS", obs = "d_3", tc = "d_3") + } + errparm_names <- switch(err_mod, + "const" = "sigma", + "obs" = paste0("sigma_", obs_vars), + "tc" = c("sigma_low", "rsd_high")) + errparm_names_optim <- if (error_model_algorithm == "OLS") NULL else errparm_names + + # Define starting values for the error model + if (err.ini[1] != "auto") { + if (!identical(names(err.ini), errparm_names)) { + stop("Please supply initial values for error model components ", paste(errparm_names, collapse = ", ")) + } else { + errparms = err.ini + } + } else { + if (err_mod == "const") { + errparms = 3 + } + if (err_mod == "obs") { + errparms = rep(3, length(obs_vars)) + } + if (err_mod == "tc") { + errparms <- c(sigma_low = 0.1, rsd_high = 0.1) + } + names(errparms) <- errparm_names + } + if (error_model_algorithm == "OLS") { + errparms_optim <- NULL + } else { + errparms_optim <- errparms + } + + # Define outtimes for model solution. + # Include time points at which observed data are available + outtimes = sort(unique(c(observed$time, seq(min(observed$time), + max(observed$time), + length.out = n.outtimes)))) + + # Define the objective function for optimisation, including (back)transformations + cost_function <- function(P, trans = TRUE, OLS = FALSE, fixed_degparms = FALSE, fixed_errparms = FALSE, update_data = TRUE, ...) + { + assign("calls", calls + 1, inherits = TRUE) # Increase the model solution counter + + # Trace parameter values if requested and if we are actually optimising + if(trace_parms & update_data) cat(P, "\n") + + # Determine local parameter values for the cost estimation + if (is.numeric(fixed_degparms)) { + cost_degparms <- fixed_degparms + cost_errparms <- P + degparms_fixed = TRUE + } else { + degparms_fixed = FALSE + } + + if (is.numeric(fixed_errparms)) { + cost_degparms <- P + cost_errparms <- fixed_errparms + errparms_fixed = TRUE + } else { + errparms_fixed = FALSE + } + + if (OLS) { + cost_degparms <- P + cost_errparms <- numeric(0) + } + + if (!OLS & !degparms_fixed & !errparms_fixed) { + cost_degparms <- P[1:(length(P) - length(errparms))] + cost_errparms <- P[(length(cost_degparms) + 1):length(P)] + } + + # Initial states for t0 + if(length(state.ini.optim) > 0) { + odeini <- c(cost_degparms[1:length(state.ini.optim)], state.ini.fixed) + names(odeini) <- c(state.ini.optim.boxnames, state.ini.fixed.boxnames) + } else { + odeini <- state.ini.fixed + names(odeini) <- state.ini.fixed.boxnames + } + + odeparms.optim <- cost_degparms[(length(state.ini.optim) + 1):length(cost_degparms)] + + if (trans == TRUE) { + odeparms <- c(odeparms.optim, transparms.fixed) + parms <- backtransform_odeparms(odeparms, mkinmod, + transform_rates = transform_rates, + transform_fractions = transform_fractions) + } else { + parms <- c(odeparms.optim, parms.fixed) + } + + # Solve the system with current parameter values + out <- mkinpredict(mkinmod, parms, + odeini, outtimes, + solution_type = solution_type, + use_compiled = use_compiled, + method.ode = method.ode, + atol = atol, rtol = rtol, ...) + + out_long <- mkin_wide_to_long(out, time = "time") + + if (err_mod == "const") { + observed$std <- if (OLS) NA else cost_errparms["sigma"] + } + if (err_mod == "obs") { + std_names <- paste0("sigma_", observed$name) + observed$std <- cost_errparms[std_names] + } + if (err_mod == "tc") { + tmp <- merge(observed, out_long, by = c("time", "name")) + tmp$name <- ordered(tmp$name, levels = obs_vars) + tmp <- tmp[order(tmp$name, tmp$time), ] + observed$std <- sqrt(cost_errparms["sigma_low"]^2 + tmp$value.y^2 * cost_errparms["rsd_high"]^2) + } + + cost_data <- merge(observed[c("name", "time", "value", "std")], out_long, + by = c("name", "time"), suffixes = c(".observed", ".predicted")) + + if (OLS) { + # Cost is the sum of squared residuals + cost <- with(cost_data, sum((value.observed - value.predicted)^2)) + } else { + # Cost is the negative log-likelihood + cost <- - with(cost_data, + sum(dnorm(x = value.observed, mean = value.predicted, sd = std, log = TRUE))) + } + + # We update the current cost and data during the optimisation, not + # during hessian calculations + if (update_data) { + + assign("out_predicted", out_long, inherits = TRUE) + assign("current_data", cost_data, inherits = TRUE) + + if (cost < cost.current) { + assign("cost.current", cost, inherits = TRUE) + if (!quiet) cat(ifelse(OLS, "Sum of squared residuals", "Negative log-likelihood"), + " at call ", calls, ": ", cost.current, "\n", sep = "") + } + } + return(cost) + } + + names_optim <- c(names(state.ini.optim), + names(transparms.optim), + errparm_names_optim) + n_optim <- length(names_optim) + + # Define lower and upper bounds other than -Inf and Inf for parameters + # for which no internal transformation is requested in the call to mkinfit + # and for optimised error model parameters + lower <- rep(-Inf, n_optim) + upper <- rep(Inf, n_optim) + names(lower) <- names(upper) <- names_optim + + # IORE exponents are not transformed, but need a lower bound + index_N <- grep("^N", names(lower)) + lower[index_N] <- 0 + + if (!transform_rates) { + index_k <- grep("^k_", names(lower)) + lower[index_k] <- 0 + index_k__iore <- grep("^k__iore_", names(lower)) + lower[index_k__iore] <- 0 + other_rate_parms <- intersect(c("alpha", "beta", "k1", "k2", "tb", "r"), names(lower)) + lower[other_rate_parms] <- 0 + } + + if (!transform_fractions) { + index_f <- grep("^f_", names(upper)) + lower[index_f] <- 0 + upper[index_f] <- 1 + other_fraction_parms <- intersect(c("g"), names(upper)) + lower[other_fraction_parms] <- 0 + upper[other_fraction_parms] <- 1 + } + + if (err_mod == "const") { + if (error_model_algorithm != "OLS") { + lower["sigma"] <- 0 + } + } + if (err_mod == "obs") { + index_sigma <- grep("^sigma_", names(lower)) + lower[index_sigma] <- 0 + } + if (err_mod == "tc") { + lower["sigma_low"] <- 0 + lower["rsd_high"] <- 0 + } + + # Counter for cost function evaluations + calls = 0 + cost.current <- Inf + out_predicted <- NA + current_data <- NA + + # Show parameter names if tracing is requested + if(trace_parms) cat(names_optim, "\n") + + # browser() + + # Do the fit and take the time until the hessians are calculated + fit_time <- system.time({ + degparms <- c(state.ini.optim, transparms.optim) + n_degparms <- length(degparms) + degparms_index <- seq(1, n_degparms) + errparms_index <- seq(n_degparms + 1, length.out = length(errparms)) + + if (error_model_algorithm == "d_3") { + if (!quiet) message("Directly optimising the complete model") + parms.start <- c(degparms, errparms) + fit_direct <- nlminb(parms.start, cost_function, + lower = lower[names(parms.start)], + upper = upper[names(parms.start)], + control = control, ...) + fit_direct$logLik <- - cost.current + if (error_model_algorithm == "direct") { + degparms <- fit_direct$par[degparms_index] + errparms <- fit_direct$par[errparms_index] + } else { + cost.current <- Inf # reset to avoid conflict with the OLS step + } + } + if (error_model_algorithm != "direct") { + if (!quiet) message("Ordinary least squares optimisation") + fit <- nlminb(degparms, cost_function, control = control, + lower = lower[names(degparms)], + upper = upper[names(degparms)], OLS = TRUE, ...) + degparms <- fit$par + + # Get the maximum likelihood estimate for sigma at the optimum parameter values + current_data$residual <- current_data$value.observed - current_data$value.predicted + sigma_mle <- sqrt(sum(current_data$residual^2)/nrow(current_data)) + + # Use that estimate for the constant variance, or as first guess if err_mod = "obs" + if (err_mod != "tc") { + errparms[names(errparms)] <- sigma_mle + } + fit$par <- c(fit$par, errparms) + + cost.current <- cost_function(c(degparms, errparms), OLS = FALSE) + fit$logLik <- - cost.current + } + if (error_model_algorithm %in% c("threestep", "fourstep", "d_3")) { + if (!quiet) message("Optimising the error model") + fit <- nlminb(errparms, cost_function, control = control, + lower = lower[names(errparms)], + upper = upper[names(errparms)], + fixed_degparms = degparms, ...) + errparms <- fit$par + } + if (error_model_algorithm == "fourstep") { + if (!quiet) message("Optimising the degradation model") + fit <- nlminb(degparms, cost_function, control = control, + lower = lower[names(degparms)], + upper = upper[names(degparms)], + fixed_errparms = errparms, ...) + degparms <- fit$par + } + if (error_model_algorithm %in% + c("direct", "twostep", "threestep", "fourstep", "d_3")) { + if (!quiet) message("Optimising the complete model") + parms.start <- c(degparms, errparms) + fit <- nlminb(parms.start, cost_function, + lower = lower[names(parms.start)], + upper = upper[names(parms.start)], + control = control, ...) + degparms <- fit$par[degparms_index] + errparms <- fit$par[errparms_index] + fit$logLik <- - cost.current + + if (error_model_algorithm == "d_3") { + d_3_messages = c( + same = "Direct fitting and three-step fitting yield approximately the same likelihood", + threestep = "Three-step fitting yielded a higher likelihood than direct fitting", + direct = "Direct fitting yielded a higher likelihood than three-step fitting") + rel_diff <- abs((fit_direct$logLik - fit$logLik))/-mean(c(fit_direct$logLik, fit$logLik)) + if (rel_diff < 0.0001) { + if (!quiet) message(d_3_messages["same"]) + fit$d_3_message <- d_3_messages["same"] + } else { + if (fit$logLik > fit_direct$logLik) { + if (!quiet) message(d_3_messages["threestep"]) + fit$d_3_message <- d_3_messages["threestep"] + } else { + if (!quiet) message(d_3_messages["direct"]) + fit <- fit_direct + fit$d_3_message <- d_3_messages["direct"] + } + } + } + } + if (err_mod != "const" & error_model_algorithm == "IRLS") { + reweight.diff <- 1 + n.iter <- 0 + errparms_last <- errparms + + while (reweight.diff > reweight.tol & + n.iter < reweight.max.iter) { + + if (!quiet) message("Optimising the error model") + fit <- nlminb(errparms, cost_function, control = control, + lower = lower[names(errparms)], + upper = upper[names(errparms)], + fixed_degparms = degparms, ...) + errparms <- fit$par + + if (!quiet) message("Optimising the degradation model") + fit <- nlminb(degparms, cost_function, control = control, + lower = lower[names(degparms)], + upper = upper[names(degparms)], + fixed_errparms = errparms, ...) + degparms <- fit$par + + reweight.diff <- dist(rbind(errparms, errparms_last)) + errparms_last <- errparms + + fit$par <- c(fit$par, errparms) + cost.current <- cost_function(c(degparms, errparms), OLS = FALSE) + fit$logLik <- - cost.current + } + } + + fit$hessian <- try(numDeriv::hessian(cost_function, c(degparms, errparms), OLS = FALSE, + update_data = FALSE), silent = TRUE) + + # Backtransform parameters + bparms.optim = backtransform_odeparms(fit$par, mkinmod, + transform_rates = transform_rates, + transform_fractions = transform_fractions) + bparms.fixed = c(state.ini.fixed, parms.fixed) + bparms.all = c(bparms.optim, parms.fixed) + + fit$hessian_notrans <- try(numDeriv::hessian(cost_function, c(bparms.all, errparms), + OLS = FALSE, trans = FALSE, update_data = FALSE), silent = TRUE) + }) + + fit$error_model_algorithm <- error_model_algorithm + + if (fit$convergence != 0) { + fit$warning = paste0("Optimisation did not converge:\n", fit$message) + warning(fit$warning) + } else { + if(!quiet) message("Optimisation successfully terminated.\n") + } + + # We need to return some more data for summary and plotting + fit$solution_type <- solution_type + fit$transform_rates <- transform_rates + fit$transform_fractions <- transform_fractions + fit$reweight.tol <- reweight.tol + fit$reweight.max.iter <- reweight.max.iter + fit$control <- control + fit$calls <- calls + fit$time <- fit_time + + # We also need the model for summary and plotting + fit$mkinmod <- mkinmod + + # We need data and predictions for summary and plotting + fit$observed <- observed + fit$obs_vars <- obs_vars + fit$predicted <- out_predicted + + # Residual sum of squares as a function of the fitted parameters + fit$rss <- function(P) cost_function(P, OLS = TRUE, update_data = FALSE) + + # Log-likelihood with possibility to fix degparms or errparms + fit$ll <- function(P, fixed_degparms = FALSE, fixed_errparms = FALSE) { + - cost_function(P, fixed_degparms = fixed_degparms, + fixed_errparms = fixed_errparms, OLS = FALSE, update_data = FALSE) + } + + # Collect initial parameter values in three dataframes + fit$start <- data.frame(value = c(state.ini.optim, + parms.optim, errparms_optim)) + fit$start$type = c(rep("state", length(state.ini.optim)), + rep("deparm", length(parms.optim)), + rep("error", length(errparms_optim))) + + fit$start_transformed = data.frame( + value = c(state.ini.optim, transparms.optim, errparms_optim), + lower = lower, + upper = upper) + + fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) + fit$fixed$type = c(rep("state", length(state.ini.fixed)), + rep("deparm", length(parms.fixed))) + + # Sort observed, predicted and residuals + current_data$name <- ordered(current_data$name, levels = obs_vars) + + ordered_data <- current_data[order(current_data$name, current_data$time), ] + + fit$data <- data.frame(time = ordered_data$time, + variable = ordered_data$name, + observed = ordered_data$value.observed, + predicted = ordered_data$value.predicted) + + fit$data$residual <- fit$data$observed - fit$data$predicted + + fit$atol <- atol + fit$rtol <- rtol + fit$err_mod <- err_mod + + # Return different sets of backtransformed parameters for summary and plotting + fit$bparms.optim <- bparms.optim + fit$bparms.fixed <- bparms.fixed + + # Return ode and state parameters for further fitting + fit$bparms.ode <- bparms.all[mkinmod$parms] + fit$bparms.state <- c(bparms.all[setdiff(names(bparms.all), names(fit$bparms.ode))], + state.ini.fixed) + names(fit$bparms.state) <- gsub("_0$", "", names(fit$bparms.state)) + + fit$errparms <- errparms + fit$df.residual <- n_observed - length(c(degparms, errparms)) + + fit$date <- date() + fit$version <- as.character(utils::packageVersion("mkin")) + fit$Rversion <- paste(R.version$major, R.version$minor, sep=".") + + class(fit) <- c("mkinfit", "modFit") + return(fit) +} |