From 0a3eb0893cb4bd1b12f07a79069d1c7f5e991495 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Fri, 25 Oct 2019 00:37:42 +0200 Subject: Use roxygen for functions and methods --- R/AIC.mmkin.R | 50 +- R/CAKE_export.R | 44 +- R/DFOP.solution.R | 32 +- R/FOMC.solution.R | 40 +- R/HS.solution.R | 35 +- R/IORE.solution.R | 41 +- R/SFO.solution.R | 26 +- R/SFORB.solution.R | 44 +- R/add_err.R | 96 ++- R/endpoints.R | 377 +++++----- R/ilr.R | 46 +- R/logLik.mkinfit.R | 52 +- R/logistic.solution.R | 55 ++ R/max_twa_parent.R | 70 +- R/mkin_long_to_wide.R | 57 +- R/mkin_wide_to_long.R | 68 +- R/mkinds.R | 55 +- R/mkinerrmin.R | 55 +- R/mkinerrplot.R | 56 +- R/mkinfit.R | 1801 ++++++++++++++++++++++++------------------------ R/mkinmod.R | 866 +++++++++++++---------- R/mkinparplot.R | 37 +- R/mkinplot.R | 4 - R/mkinpredict.R | 119 +++- R/mkinresplot.R | 149 ++-- R/mkinsub.R | 54 +- R/mmkin.R | 113 ++- R/nafta.R | 79 ++- R/plot.mkinfit.R | 126 +++- R/plot.mmkin.R | 68 +- R/sigma_twocomp.R | 26 + R/summary.mkinfit.R | 272 ++++++++ R/transform_odeparms.R | 438 +++++++----- 33 files changed, 3364 insertions(+), 2087 deletions(-) delete mode 100644 R/mkinplot.R create mode 100644 R/summary.mkinfit.R (limited to 'R') diff --git a/R/AIC.mmkin.R b/R/AIC.mmkin.R index 1d306ff9..ab17f0a0 100644 --- a/R/AIC.mmkin.R +++ b/R/AIC.mmkin.R @@ -1,21 +1,35 @@ -# Copyright (C) 2018 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see -AIC.mmkin <- function(object, ..., k = 2) { +#' Calculated the AIC for a column of an mmkin object +#' +#' Provides a convenient way to compare different kinetic models fitted to the +#' same dataset. +#' +#' @param object An object of class \code{\link{mmkin}}, containing only one +#' column. +#' @param \dots For compatibility with the generic method +#' @param k As in the generic method +#' @return As in the generic method (a numeric value for single fits, or a +#' dataframe if there are several fits in the column). +#' @author Johannes Ranke +#' @examples +#' +#' \dontrun{ # skip, as it takes > 10 s on winbuilder +#' f <- mmkin(c("SFO", "FOMC", "DFOP"), +#' list("FOCUS A" = FOCUS_2006_A, +#' "FOCUS C" = FOCUS_2006_C), cores = 1, quiet = TRUE) +#' AIC(f[1, "FOCUS A"]) # We get a single number for a single fit +#' +#' # For FOCUS A, the models fit almost equally well, so the higher the number +#' # of parameters, the higher (worse) the AIC +#' AIC(f[, "FOCUS A"]) +#' AIC(f[, "FOCUS A"], k = 0) # If we do not penalize additional parameters, we get nearly the same +#' +#' # For FOCUS C, the more complex models fit better +#' AIC(f[, "FOCUS C"]) +#' } +#' +#' @export +AIC.mmkin <- function(object, ..., k = 2) +{ # We can only handle a single column if (ncol(object) != 1) stop("Please provide a single column object") n.fits <- length(object) diff --git a/R/CAKE_export.R b/R/CAKE_export.R index db9caa8d..70661b10 100644 --- a/R/CAKE_export.R +++ b/R/CAKE_export.R @@ -1,20 +1,30 @@ -# Copyright (C) 2019 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see +#' Export a list of datasets format to a CAKE study file +#' +#' In addition to the datasets, the pathways in the degradation model can be +#' specified as well. +#' +#' @param ds A named list of datasets in long format as compatible with +#' \code{\link{mkinfit}}. +#' @param map A character vector with CAKE compartment names (Parent, A1, ...), +#' named with the names used in the list of datasets. +#' @param links An optional character vector of target compartments, named with +#' the names of the source compartments. In order to make this easier, the +#' names are used as in the datasets supplied. +#' @param filename Where to write the result. Should end in .csf in order to be +#' compatible with CAKE. +#' @param path An optional path to the output file. +#' @param overwrite If TRUE, existing files are overwritten. +#' @param study The name of the study. +#' @param description An optional description. +#' @param time_unit The time unit for the residue data. +#' @param res_unit The unit used for the residues. +#' @param comment An optional comment. +#' @param date The date of file creation. +#' @param optimiser Can be OLS or IRLS. +#' @importFrom utils write.table +#' @return The function is called for its side effect. +#' @author Johannes Ranke +#' @export CAKE_export <- function(ds, map = c(parent = "Parent"), links = NA, filename = "CAKE_export.csf", path = ".", overwrite = FALSE, diff --git a/R/DFOP.solution.R b/R/DFOP.solution.R index 8531cfed..608e7e18 100644 --- a/R/DFOP.solution.R +++ b/R/DFOP.solution.R @@ -1,5 +1,27 @@ -DFOP.solution <- function(t, parent.0, k1, k2, g) -{ - parent = g * parent.0 * exp(-k1 * t) + - (1 - g) * parent.0 * exp(-k2 * t) -} +#' Double First-Order in Parallel kinetics +#' +#' Function describing decline from a defined starting value using the sum of +#' two exponential decline functions. +#' +#' @param t Time. +#' @param parent.0 Starting value for the response variable at time zero. +#' @param k1 First kinetic constant. +#' @param k2 Second kinetic constant. +#' @param g Fraction of the starting value declining according to the first +#' kinetic constant. +#' @return The value of the response variable at time \code{t}. +#' @references FOCUS (2006) \dQuote{Guidance Document on Estimating Persistence +#' and Degradation Kinetics from Environmental Fate Studies on Pesticides in +#' EU Registration} Report of the FOCUS Work Group on Degradation Kinetics, +#' EC Document Reference Sanco/10058/2005 version 2.0, 434 pp, +#' \url{http://esdac.jrc.ec.europa.eu/projects/degradation-kinetics} +#' @examples +#' +#' plot(function(x) DFOP.solution(x, 100, 5, 0.5, 0.3), 0, 4, ylim = c(0,100)) +#' +#' @export +DFOP.solution <- function(t, parent.0, k1, k2, g) +{ + parent = g * parent.0 * exp(-k1 * t) + + (1 - g) * parent.0 * exp(-k2 * t) +} diff --git a/R/FOMC.solution.R b/R/FOMC.solution.R index 8bd13d6b..f5e6a7ea 100644 --- a/R/FOMC.solution.R +++ b/R/FOMC.solution.R @@ -1,4 +1,36 @@ -FOMC.solution <- function(t, parent.0, alpha, beta) -{ - parent = parent.0 / (t/beta + 1)^alpha -} +#' First-Order Multi-Compartment kinetics +#' +#' Function describing exponential decline from a defined starting value, with +#' a decreasing rate constant. +#' +#' The form given here differs slightly from the original reference by +#' Gustafson and Holden (1990). The parameter \code{beta} corresponds to 1/beta +#' in the original equation. +#' +#' @param t Time. +#' @param parent.0 Starting value for the response variable at time zero. +#' @param alpha Shape parameter determined by coefficient of variation of rate +#' constant values. +#' @param beta Location parameter. +#' @return The value of the response variable at time \code{t}. +#' @note The solution of the FOMC kinetic model reduces to the +#' \code{\link{SFO.solution}} for large values of \code{alpha} and +#' \code{beta} with \eqn{k = \frac{\beta}{\alpha}}{k = beta/alpha}. +#' @references FOCUS (2006) \dQuote{Guidance Document on Estimating Persistence +#' and Degradation Kinetics from Environmental Fate Studies on Pesticides in +#' EU Registration} Report of the FOCUS Work Group on Degradation Kinetics, +#' EC Document Reference Sanco/10058/2005 version 2.0, 434 pp, +#' \url{http://esdac.jrc.ec.europa.eu/projects/degradation-kinetics} +#' +#' Gustafson DI and Holden LR (1990) Nonlinear pesticide dissipation in soil: +#' A new model based on spatial variability. \emph{Environmental Science and +#' Technology} \bold{24}, 1032-1038 +#' @examples +#' +#' plot(function(x) FOMC.solution(x, 100, 10, 2), 0, 2, ylim = c(0, 100)) +#' +#' @export +FOMC.solution <- function(t, parent.0, alpha, beta) +{ + parent = parent.0 / (t/beta + 1)^alpha +} diff --git a/R/HS.solution.R b/R/HS.solution.R index 4651a6a8..890ad8ff 100644 --- a/R/HS.solution.R +++ b/R/HS.solution.R @@ -1,6 +1,29 @@ -HS.solution <- function(t, parent.0, k1, k2, tb) -{ - parent = ifelse(t <= tb, - parent.0 * exp(-k1 * t), - parent.0 * exp(-k1 * tb) * exp(-k2 * (t - tb))) -} +#' Hockey-Stick kinetics +#' +#' Function describing two exponential decline functions with a break point +#' between them. +#' +#' @param t Time. +#' @param parent.0 Starting value for the response variable at time zero. +#' @param k1 First kinetic constant. +#' @param k2 Second kinetic constant. +#' @param tb Break point. Before this time, exponential decline according to +#' \code{k1} is calculated, after this time, exponential decline proceeds +#' according to \code{k2}. +#' @return The value of the response variable at time \code{t}. +#' @references FOCUS (2006) \dQuote{Guidance Document on Estimating Persistence +#' and Degradation Kinetics from Environmental Fate Studies on Pesticides in +#' EU Registration} Report of the FOCUS Work Group on Degradation Kinetics, +#' EC Document Reference Sanco/10058/2005 version 2.0, 434 pp, +#' \url{http://esdac.jrc.ec.europa.eu/projects/degradation-kinetics} +#' @examples +#' +#' plot(function(x) HS.solution(x, 100, 2, 0.3, 0.5), 0, 2, ylim=c(0,100)) +#' +#' @export +HS.solution <- function(t, parent.0, k1, k2, tb) +{ + parent = ifelse(t <= tb, + parent.0 * exp(-k1 * t), + parent.0 * exp(-k1 * tb) * exp(-k2 * (t - tb))) +} diff --git a/R/IORE.solution.R b/R/IORE.solution.R index 5405be96..58807108 100644 --- a/R/IORE.solution.R +++ b/R/IORE.solution.R @@ -1,4 +1,37 @@ -IORE.solution <- function(t, parent.0, k__iore, N) -{ - parent = (parent.0^(1 - N) - (1 - N) * k__iore * t)^(1/(1 - N)) -} +#' Indeterminate order rate equation kinetics +#' +#' Function describing exponential decline from a defined starting value, with +#' a concentration dependent rate constant. +#' +#' @param t Time. +#' @param parent.0 Starting value for the response variable at time zero. +#' @param k__iore Rate constant. Note that this depends on the concentration +#' units used. +#' @param N Exponent describing the nonlinearity of the rate equation +#' @return The value of the response variable at time \code{t}. +#' @note The solution of the IORE kinetic model reduces to the +#' \code{\link{SFO.solution}} if N = 1. The parameters of the IORE model can +#' be transformed to equivalent parameters of the FOMC mode - see the NAFTA +#' guidance for details. +#' @references NAFTA Technical Working Group on Pesticides (not dated) Guidance +#' for Evaluating and Calculating Degradation Kinetics in Environmental Media +#' @keywords manip +#' @examples +#' +#' plot(function(x) IORE.solution(x, 100, 0.2, 1.3), 0, 2, ylim = c(0, 100)) +#' \dontrun{ +#' fit.fomc <- mkinfit("FOMC", FOCUS_2006_C, quiet = TRUE) +#' fit.iore <- mkinfit("IORE", FOCUS_2006_C, quiet = TRUE) +#' fit.iore.deS <- mkinfit("IORE", FOCUS_2006_C, solution_type = "deSolve", quiet = TRUE) +#' +#' print(data.frame(fit.fomc$par, fit.iore$par, fit.iore.deS$par, +#' row.names = paste("model par", 1:4))) +#' print(rbind(fomc = endpoints(fit.fomc)$distimes, iore = endpoints(fit.iore)$distimes, +#' iore.deS = endpoints(fit.iore)$distimes)) +#' } +#' +#' @export +IORE.solution <- function(t, parent.0, k__iore, N) +{ + parent = (parent.0^(1 - N) - (1 - N) * k__iore * t)^(1/(1 - N)) +} diff --git a/R/SFO.solution.R b/R/SFO.solution.R index 3a376e48..17c16a4d 100644 --- a/R/SFO.solution.R +++ b/R/SFO.solution.R @@ -1,4 +1,22 @@ -SFO.solution <- function(t, parent.0, k) -{ - parent = parent.0 * exp(-k * t) -} +#' Single First-Order kinetics +#' +#' Function describing exponential decline from a defined starting value. +#' +#' @param t Time. +#' @param parent.0 Starting value for the response variable at time zero. +#' @param k Kinetic constant. +#' @return The value of the response variable at time \code{t}. +#' @references FOCUS (2006) \dQuote{Guidance Document on Estimating Persistence +#' and Degradation Kinetics from Environmental Fate Studies on Pesticides in +#' EU Registration} Report of the FOCUS Work Group on Degradation Kinetics, +#' EC Document Reference Sanco/10058/2005 version 2.0, 434 pp, +#' \url{http://esdac.jrc.ec.europa.eu/projects/degradation-kinetics} +#' @examples +#' +#' \dontrun{plot(function(x) SFO.solution(x, 100, 3), 0, 2)} +#' +#' @export +SFO.solution <- function(t, parent.0, k) +{ + parent = parent.0 * exp(-k * t) +} diff --git a/R/SFORB.solution.R b/R/SFORB.solution.R index 4cb94def..2abe4577 100644 --- a/R/SFORB.solution.R +++ b/R/SFORB.solution.R @@ -1,9 +1,35 @@ -SFORB.solution = function(t, parent.0, k_12, k_21, k_1output) { - sqrt_exp = sqrt(1/4 * (k_12 + k_21 + k_1output)^2 + k_12 * k_21 - (k_12 + k_1output) * k_21) - b1 = 0.5 * (k_12 + k_21 + k_1output) + sqrt_exp - b2 = 0.5 * (k_12 + k_21 + k_1output) - sqrt_exp - - parent = parent.0 * - (((k_12 + k_21 - b1)/(b2 - b1)) * exp(-b1 * t) + - ((k_12 + k_21 - b2)/(b1 - b2)) * exp(-b2 * t)) -} +#' Single First-Order Reversible Binding kinetics +#' +#' Function describing the solution of the differential equations describing +#' the kinetic model with first-order terms for a two-way transfer from a free +#' to a bound fraction, and a first-order degradation term for the free +#' fraction. The initial condition is a defined amount in the free fraction +#' and no substance in the bound fraction. +#' +#' @param t Time. +#' @param parent.0 Starting value for the response variable at time zero. +#' @param k_12 Kinetic constant describing transfer from free to bound. +#' @param k_21 Kinetic constant describing transfer from bound to free. +#' @param k_1output Kinetic constant describing degradation of the free +#' fraction. +#' @return The value of the response variable, which is the sum of free and +#' bound fractions at time \code{t}. +#' @references FOCUS (2006) \dQuote{Guidance Document on Estimating Persistence +#' and Degradation Kinetics from Environmental Fate Studies on Pesticides in +#' EU Registration} Report of the FOCUS Work Group on Degradation Kinetics, +#' EC Document Reference Sanco/10058/2005 version 2.0, 434 pp, +#' \url{http://esdac.jrc.ec.europa.eu/projects/degradation-kinetics} +#' @examples +#' +#' \dontrun{plot(function(x) SFORB.solution(x, 100, 0.5, 2, 3), 0, 2)} +#' +#' @export +SFORB.solution = function(t, parent.0, k_12, k_21, k_1output) { + sqrt_exp = sqrt(1/4 * (k_12 + k_21 + k_1output)^2 + k_12 * k_21 - (k_12 + k_1output) * k_21) + b1 = 0.5 * (k_12 + k_21 + k_1output) + sqrt_exp + b2 = 0.5 * (k_12 + k_21 + k_1output) - sqrt_exp + + parent = parent.0 * + (((k_12 + k_21 - b1)/(b2 - b1)) * exp(-b1 * t) + + ((k_12 + k_21 - b2)/(b1 - b2)) * exp(-b2 * t)) +} diff --git a/R/add_err.R b/R/add_err.R index b2a1808e..a523e9c2 100644 --- a/R/add_err.R +++ b/R/add_err.R @@ -1,24 +1,78 @@ -# Copyright (C) 2015-2018 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see - -add_err = function(prediction, sdfunc, secondary = c("M1", "M2"), - n = 1000, LOD = 0.1, reps = 2, - digits = 1, seed = NA) +#' Add normally distributed errors to simulated kinetic degradation data +#' +#' Normally distributed errors are added to data predicted for a specific +#' degradation model using \code{\link{mkinpredict}}. The variance of the error +#' may depend on the predicted value and is specified as a standard deviation. +#' +#' @param prediction A prediction from a kinetic model as produced by +#' \code{\link{mkinpredict}}. +#' @param sdfunc A function taking the predicted value as its only argument and +#' returning a standard deviation that should be used for generating the +#' random error terms for this value. +#' @param secondary The names of state variables that should have an initial +#' value of zero +#' @param n The number of datasets to be generated. +#' @param LOD The limit of detection (LOD). Values that are below the LOD after +#' adding the random error will be set to NA. +#' @param reps The number of replicates to be generated within the datasets. +#' @param digits The number of digits to which the values will be rounded. +#' @param seed The seed used for the generation of random numbers. If NA, the +#' seed is not set. +#' @importFrom stats rnorm +#' @return A list of datasets compatible with \code{\link{mmkin}}, i.e. the +#' components of the list are datasets compatible with \code{\link{mkinfit}}. +#' @author Johannes Ranke +#' @references Ranke J and Lehmann R (2015) To t-test or not to t-test, that is +#' the question. XV Symposium on Pesticide Chemistry 2-4 September 2015, +#' Piacenza, Italy +#' http://chem.uft.uni-bremen.de/ranke/posters/piacenza_2015.pdf +#' @examples +#' +#' # The kinetic model +#' m_SFO_SFO <- mkinmod(parent = mkinsub("SFO", "M1"), +#' M1 = mkinsub("SFO"), use_of_ff = "max") +#' +#' # Generate a prediction for a specific set of parameters +#' sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) +#' +#' # This is the prediction used for the "Type 2 datasets" on the Piacenza poster +#' # from 2015 +#' d_SFO_SFO <- mkinpredict(m_SFO_SFO, +#' c(k_parent = 0.1, f_parent_to_M1 = 0.5, +#' k_M1 = log(2)/1000), +#' c(parent = 100, M1 = 0), +#' sampling_times) +#' +#' # Add an error term with a constant (independent of the value) standard deviation +#' # of 10, and generate three datasets +#' d_SFO_SFO_err <- add_err(d_SFO_SFO, function(x) 10, n = 3, seed = 123456789 ) +#' +#' # Name the datasets for nicer plotting +#' names(d_SFO_SFO_err) <- paste("Dataset", 1:3) +#' +#' # Name the model in the list of models (with only one member in this case) for +#' # nicer plotting later on. Be quiet and use only one core not to offend CRAN +#' # checks +#' \dontrun{ +#' f_SFO_SFO <- mmkin(list("SFO-SFO" = m_SFO_SFO), +#' d_SFO_SFO_err, cores = 1, +#' quiet = TRUE) +#' +#' plot(f_SFO_SFO) +#' +#' # We would like to inspect the fit for dataset 3 more closely +#' # Using double brackets makes the returned object an mkinfit object +#' # instead of a list of mkinfit objects, so plot.mkinfit is used +#' plot(f_SFO_SFO[[3]], show_residuals = TRUE) +#' +#' # If we use single brackets, we should give two indices (model and dataset), +#' # and plot.mmkin is used +#' plot(f_SFO_SFO[1, 3]) +#' } +#' +#' @export +add_err <- function(prediction, sdfunc, secondary = c("M1", "M2"), + n = 1000, LOD = 0.1, reps = 2, digits = 1, seed = NA) { if (!is.na(seed)) set.seed(seed) diff --git a/R/endpoints.R b/R/endpoints.R index f8a44c4d..14beadea 100644 --- a/R/endpoints.R +++ b/R/endpoints.R @@ -1,178 +1,199 @@ -endpoints <- function(fit) { - # Calculate dissipation times DT50 and DT90 and formation - # fractions as well as SFORB eigenvalues from optimised parameters - # Additional DT50 values are calculated from the FOMC DT90 and k1 and k2 from - # HS and DFOP, as well as from Eigenvalues b1 and b2 of any SFORB models - ep <- list() - obs_vars <- fit$obs_vars - parms.all <- c(fit$bparms.optim, fit$bparms.fixed) - ep$ff <- vector() - ep$SFORB <- vector() - ep$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), - DT90 = rep(NA, length(obs_vars)), - row.names = obs_vars) - for (obs_var in obs_vars) { - type = names(fit$mkinmod$map[[obs_var]])[1] - - # Get formation fractions if directly fitted, and calculate remaining fraction to sink - f_names = grep(paste("^f", obs_var, sep = "_"), names(parms.all), value=TRUE) - if (length(f_names) > 0) { - f_values = parms.all[f_names] - f_to_sink = 1 - sum(f_values) - names(f_to_sink) = ifelse(type == "SFORB", - paste(obs_var, "free", "sink", sep = "_"), - paste(obs_var, "sink", sep = "_")) - for (f_name in f_names) { - ep$ff[[sub("f_", "", sub("_to_", "_", f_name))]] = f_values[[f_name]] - } - ep$ff = append(ep$ff, f_to_sink) - } - - # Get the rest - if (type == "SFO") { - k_names = grep(paste("^k", obs_var, sep="_"), names(parms.all), value=TRUE) - k_tot = sum(parms.all[k_names]) - DT50 = log(2)/k_tot - DT90 = log(10)/k_tot - if (fit$mkinmod$use_of_ff == "min") { - for (k_name in k_names) - { - ep$ff[[sub("k_", "", k_name)]] = parms.all[[k_name]] / k_tot - } - } - } - if (type == "FOMC") { - alpha = parms.all["alpha"] - beta = parms.all["beta"] - DT50 = beta * (2^(1/alpha) - 1) - DT90 = beta * (10^(1/alpha) - 1) - DT50_back = DT90 / (log(10)/log(2)) # Backcalculated DT50 as recommended in FOCUS 2011 - ep$distimes[obs_var, c("DT50back")] = DT50_back - } - if (type == "IORE") { - k_names = grep(paste("^k__iore", obs_var, sep="_"), names(parms.all), value=TRUE) - k_tot = sum(parms.all[k_names]) - # From the NAFTA kinetics guidance, p. 5 - n = parms.all[paste("N", obs_var, sep = "_")] - k = k_tot - # Use the initial concentration of the parent compound - source_name = fit$mkinmod$map[[1]][[1]] - c0 = parms.all[paste(source_name, "0", sep = "_")] - alpha = 1 / (n - 1) - beta = (c0^(1 - n))/(k * (n - 1)) - DT50 = beta * (2^(1/alpha) - 1) - DT90 = beta * (10^(1/alpha) - 1) - DT50_back = DT90 / (log(10)/log(2)) # Backcalculated DT50 as recommended in FOCUS 2011 - ep$distimes[obs_var, c("DT50back")] = DT50_back - if (fit$mkinmod$use_of_ff == "min") { - for (k_name in k_names) - { - ep$ff[[sub("k_", "", k_name)]] = parms.all[[k_name]] / k_tot - } - } - } - if (type == "DFOP") { - k1 = parms.all["k1"] - k2 = parms.all["k2"] - g = parms.all["g"] - f <- function(log_t, x) { - t <- exp(log_t) - fraction <- g * exp( - k1 * t) + (1 - g) * exp( - k2 * t) - (fraction - (1 - x/100))^2 - } - DT50_k1 = log(2)/k1 - DT50_k2 = log(2)/k2 - DT90_k1 = log(10)/k1 - DT90_k2 = log(10)/k2 - - DT50 <- try(exp(optimize(f, c(log(DT50_k1), log(DT50_k2)), x=50)$minimum), - silent = TRUE) - DT90 <- try(exp(optimize(f, c(log(DT90_k1), log(DT90_k2)), x=90)$minimum), - silent = TRUE) - if (inherits(DT50, "try-error")) DT50 = NA - if (inherits(DT90, "try-error")) DT90 = NA - - ep$distimes[obs_var, c("DT50_k1")] = DT50_k1 - ep$distimes[obs_var, c("DT50_k2")] = DT50_k2 - } - if (type == "HS") { - k1 = parms.all["k1"] - k2 = parms.all["k2"] - tb = parms.all["tb"] - DTx <- function(x) { - DTx.a <- (log(100/(100 - x)))/k1 - DTx.b <- tb + (log(100/(100 - x)) - k1 * tb)/k2 - if (DTx.a < tb) DTx <- DTx.a - else DTx <- DTx.b - return(DTx) - } - DT50 <- DTx(50) - DT90 <- DTx(90) - DT50_k1 = log(2)/k1 - DT50_k2 = log(2)/k2 - ep$distimes[obs_var, c("DT50_k1")] = DT50_k1 - ep$distimes[obs_var, c("DT50_k2")] = DT50_k2 - } - if (type == "SFORB") { - # FOCUS kinetics (2006), p. 60 f - k_out_names = grep(paste("^k", obs_var, "free", sep="_"), names(parms.all), value=TRUE) - k_out_names = setdiff(k_out_names, paste("k", obs_var, "free", "bound", sep="_")) - k_1output = sum(parms.all[k_out_names]) - k_12 = parms.all[paste("k", obs_var, "free", "bound", sep="_")] - k_21 = parms.all[paste("k", obs_var, "bound", "free", sep="_")] - - sqrt_exp = sqrt(1/4 * (k_12 + k_21 + k_1output)^2 + k_12 * k_21 - (k_12 + k_1output) * k_21) - b1 = 0.5 * (k_12 + k_21 + k_1output) + sqrt_exp - b2 = 0.5 * (k_12 + k_21 + k_1output) - sqrt_exp - - DT50_b1 = log(2)/b1 - DT50_b2 = log(2)/b2 - DT90_b1 = log(10)/b1 - DT90_b2 = log(10)/b2 - - SFORB_fraction = function(t) { - ((k_12 + k_21 - b1)/(b2 - b1)) * exp(-b1 * t) + - ((k_12 + k_21 - b2)/(b1 - b2)) * exp(-b2 * t) - } - - f_50 <- function(log_t) (SFORB_fraction(exp(log_t)) - 0.5)^2 - log_DT50 <- try(optimize(f_50, c(log(DT50_b1), log(DT50_b2)))$minimum, - silent = TRUE) - f_90 <- function(log_t) (SFORB_fraction(exp(log_t)) - 0.1)^2 - log_DT90 <- try(optimize(f_90, c(log(DT90_b1), log(DT90_b2)))$minimum, - silent = TRUE) - - DT50 = if (inherits(log_DT50, "try-error")) NA - else exp(log_DT50) - DT90 = if (inherits(log_DT90, "try-error")) NA - else exp(log_DT90) - - for (k_out_name in k_out_names) - { - ep$ff[[sub("k_", "", k_out_name)]] = parms.all[[k_out_name]] / k_1output - } - - # Return the eigenvalues for comparison with DFOP rate constants - ep$SFORB[[paste(obs_var, "b1", sep="_")]] = b1 - ep$SFORB[[paste(obs_var, "b2", sep="_")]] = b2 - - ep$distimes[obs_var, c(paste("DT50", obs_var, "b1", sep = "_"))] = DT50_b1 - ep$distimes[obs_var, c(paste("DT50", obs_var, "b2", sep = "_"))] = DT50_b2 - } - if (type == "logistic") { - # FOCUS kinetics (2014) p. 67 - kmax = parms.all["kmax"] - k0 = parms.all["k0"] - r = parms.all["r"] - DT50 = (1/r) * log(1 - ((kmax/k0) * (1 - 2^(r/kmax)))) - DT90 = (1/r) * log(1 - ((kmax/k0) * (1 - 10^(r/kmax)))) - - DT50_k0 = log(2)/k0 - DT50_kmax = log(2)/kmax - ep$distimes[obs_var, c("DT50_k0")] = DT50_k0 - ep$distimes[obs_var, c("DT50_kmax")] = DT50_kmax - } - ep$distimes[obs_var, c("DT50", "DT90")] = c(DT50, DT90) - } - return(ep) -} +#' Function to calculate endpoints for further use from kinetic models fitted +#' with mkinfit +#' +#' This function calculates DT50 and DT90 values as well as formation fractions +#' from kinetic models fitted with mkinfit. If the SFORB model was specified +#' for one of the parents or metabolites, the Eigenvalues are returned. These +#' are equivalent to the rate constantes of the DFOP model, but with the +#' advantage that the SFORB model can also be used for metabolites. +#' +#' @param fit An object of class \code{\link{mkinfit}}. +#' @importFrom stats optimize +#' @return A list with the components mentioned above. +#' @note The function is used internally by \code{\link{summary.mkinfit}}. +#' @author Johannes Ranke +#' @keywords manip +#' @examples +#' +#' fit <- mkinfit("FOMC", FOCUS_2006_C, quiet = TRUE) +#' endpoints(fit) +#' +#' @export +endpoints <- function(fit) { + # Calculate dissipation times DT50 and DT90 and formation + # fractions as well as SFORB eigenvalues from optimised parameters + # Additional DT50 values are calculated from the FOMC DT90 and k1 and k2 from + # HS and DFOP, as well as from Eigenvalues b1 and b2 of any SFORB models + ep <- list() + obs_vars <- fit$obs_vars + parms.all <- c(fit$bparms.optim, fit$bparms.fixed) + ep$ff <- vector() + ep$SFORB <- vector() + ep$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), + DT90 = rep(NA, length(obs_vars)), + row.names = obs_vars) + for (obs_var in obs_vars) { + type = names(fit$mkinmod$map[[obs_var]])[1] + + # Get formation fractions if directly fitted, and calculate remaining fraction to sink + f_names = grep(paste("^f", obs_var, sep = "_"), names(parms.all), value=TRUE) + if (length(f_names) > 0) { + f_values = parms.all[f_names] + f_to_sink = 1 - sum(f_values) + names(f_to_sink) = ifelse(type == "SFORB", + paste(obs_var, "free", "sink", sep = "_"), + paste(obs_var, "sink", sep = "_")) + for (f_name in f_names) { + ep$ff[[sub("f_", "", sub("_to_", "_", f_name))]] = f_values[[f_name]] + } + ep$ff = append(ep$ff, f_to_sink) + } + + # Get the rest + if (type == "SFO") { + k_names = grep(paste("^k", obs_var, sep="_"), names(parms.all), value=TRUE) + k_tot = sum(parms.all[k_names]) + DT50 = log(2)/k_tot + DT90 = log(10)/k_tot + if (fit$mkinmod$use_of_ff == "min") { + for (k_name in k_names) + { + ep$ff[[sub("k_", "", k_name)]] = parms.all[[k_name]] / k_tot + } + } + } + if (type == "FOMC") { + alpha = parms.all["alpha"] + beta = parms.all["beta"] + DT50 = beta * (2^(1/alpha) - 1) + DT90 = beta * (10^(1/alpha) - 1) + DT50_back = DT90 / (log(10)/log(2)) # Backcalculated DT50 as recommended in FOCUS 2011 + ep$distimes[obs_var, c("DT50back")] = DT50_back + } + if (type == "IORE") { + k_names = grep(paste("^k__iore", obs_var, sep="_"), names(parms.all), value=TRUE) + k_tot = sum(parms.all[k_names]) + # From the NAFTA kinetics guidance, p. 5 + n = parms.all[paste("N", obs_var, sep = "_")] + k = k_tot + # Use the initial concentration of the parent compound + source_name = fit$mkinmod$map[[1]][[1]] + c0 = parms.all[paste(source_name, "0", sep = "_")] + alpha = 1 / (n - 1) + beta = (c0^(1 - n))/(k * (n - 1)) + DT50 = beta * (2^(1/alpha) - 1) + DT90 = beta * (10^(1/alpha) - 1) + DT50_back = DT90 / (log(10)/log(2)) # Backcalculated DT50 as recommended in FOCUS 2011 + ep$distimes[obs_var, c("DT50back")] = DT50_back + if (fit$mkinmod$use_of_ff == "min") { + for (k_name in k_names) + { + ep$ff[[sub("k_", "", k_name)]] = parms.all[[k_name]] / k_tot + } + } + } + if (type == "DFOP") { + k1 = parms.all["k1"] + k2 = parms.all["k2"] + g = parms.all["g"] + f <- function(log_t, x) { + t <- exp(log_t) + fraction <- g * exp( - k1 * t) + (1 - g) * exp( - k2 * t) + (fraction - (1 - x/100))^2 + } + DT50_k1 = log(2)/k1 + DT50_k2 = log(2)/k2 + DT90_k1 = log(10)/k1 + DT90_k2 = log(10)/k2 + + DT50 <- try(exp(optimize(f, c(log(DT50_k1), log(DT50_k2)), x=50)$minimum), + silent = TRUE) + DT90 <- try(exp(optimize(f, c(log(DT90_k1), log(DT90_k2)), x=90)$minimum), + silent = TRUE) + if (inherits(DT50, "try-error")) DT50 = NA + if (inherits(DT90, "try-error")) DT90 = NA + + ep$distimes[obs_var, c("DT50_k1")] = DT50_k1 + ep$distimes[obs_var, c("DT50_k2")] = DT50_k2 + } + if (type == "HS") { + k1 = parms.all["k1"] + k2 = parms.all["k2"] + tb = parms.all["tb"] + DTx <- function(x) { + DTx.a <- (log(100/(100 - x)))/k1 + DTx.b <- tb + (log(100/(100 - x)) - k1 * tb)/k2 + if (DTx.a < tb) DTx <- DTx.a + else DTx <- DTx.b + return(DTx) + } + DT50 <- DTx(50) + DT90 <- DTx(90) + DT50_k1 = log(2)/k1 + DT50_k2 = log(2)/k2 + ep$distimes[obs_var, c("DT50_k1")] = DT50_k1 + ep$distimes[obs_var, c("DT50_k2")] = DT50_k2 + } + if (type == "SFORB") { + # FOCUS kinetics (2006), p. 60 f + k_out_names = grep(paste("^k", obs_var, "free", sep="_"), names(parms.all), value=TRUE) + k_out_names = setdiff(k_out_names, paste("k", obs_var, "free", "bound", sep="_")) + k_1output = sum(parms.all[k_out_names]) + k_12 = parms.all[paste("k", obs_var, "free", "bound", sep="_")] + k_21 = parms.all[paste("k", obs_var, "bound", "free", sep="_")] + + sqrt_exp = sqrt(1/4 * (k_12 + k_21 + k_1output)^2 + k_12 * k_21 - (k_12 + k_1output) * k_21) + b1 = 0.5 * (k_12 + k_21 + k_1output) + sqrt_exp + b2 = 0.5 * (k_12 + k_21 + k_1output) - sqrt_exp + + DT50_b1 = log(2)/b1 + DT50_b2 = log(2)/b2 + DT90_b1 = log(10)/b1 + DT90_b2 = log(10)/b2 + + SFORB_fraction = function(t) { + ((k_12 + k_21 - b1)/(b2 - b1)) * exp(-b1 * t) + + ((k_12 + k_21 - b2)/(b1 - b2)) * exp(-b2 * t) + } + + f_50 <- function(log_t) (SFORB_fraction(exp(log_t)) - 0.5)^2 + log_DT50 <- try(optimize(f_50, c(log(DT50_b1), log(DT50_b2)))$minimum, + silent = TRUE) + f_90 <- function(log_t) (SFORB_fraction(exp(log_t)) - 0.1)^2 + log_DT90 <- try(optimize(f_90, c(log(DT90_b1), log(DT90_b2)))$minimum, + silent = TRUE) + + DT50 = if (inherits(log_DT50, "try-error")) NA + else exp(log_DT50) + DT90 = if (inherits(log_DT90, "try-error")) NA + else exp(log_DT90) + + for (k_out_name in k_out_names) + { + ep$ff[[sub("k_", "", k_out_name)]] = parms.all[[k_out_name]] / k_1output + } + + # Return the eigenvalues for comparison with DFOP rate constants + ep$SFORB[[paste(obs_var, "b1", sep="_")]] = b1 + ep$SFORB[[paste(obs_var, "b2", sep="_")]] = b2 + + ep$distimes[obs_var, c(paste("DT50", obs_var, "b1", sep = "_"))] = DT50_b1 + ep$distimes[obs_var, c(paste("DT50", obs_var, "b2", sep = "_"))] = DT50_b2 + } + if (type == "logistic") { + # FOCUS kinetics (2014) p. 67 + kmax = parms.all["kmax"] + k0 = parms.all["k0"] + r = parms.all["r"] + DT50 = (1/r) * log(1 - ((kmax/k0) * (1 - 2^(r/kmax)))) + DT90 = (1/r) * log(1 - ((kmax/k0) * (1 - 10^(r/kmax)))) + + DT50_k0 = log(2)/k0 + DT50_kmax = log(2)/kmax + ep$distimes[obs_var, c("DT50_k0")] = DT50_k0 + ep$distimes[obs_var, c("DT50_kmax")] = DT50_kmax + } + ep$distimes[obs_var, c("DT50", "DT90")] = c(DT50, DT90) + } + return(ep) +} diff --git a/R/ilr.R b/R/ilr.R index 620afc49..e3102cbc 100644 --- a/R/ilr.R +++ b/R/ilr.R @@ -1,6 +1,3 @@ -# Copyright (C) 2012 René Lehmann, Johannes Ranke -# Contact: jranke@uni-bremen.de - # This file is part of the R package mkin # mkin is free software: you can redistribute it and/or modify it under the @@ -16,6 +13,47 @@ # You should have received a copy of the GNU General Public License along with # this program. If not, see +#' Function to perform isometric log-ratio transformation +#' +#' This implementation is a special case of the class of isometric log-ratio +#' transformations. +#' +#' @aliases ilr invilr +#' @param x A numeric vector. Naturally, the forward transformation is only +#' sensible for vectors with all elements being greater than zero. +#' @return The result of the forward or backward transformation. The returned +#' components always sum to 1 for the case of the inverse log-ratio +#' transformation. +#' @author René Lehmann and Johannes Ranke +#' @seealso Another implementation can be found in R package +#' \code{robCompositions}. +#' @references Peter Filzmoser, Karel Hron (2008) Outlier Detection for +#' Compositional Data Using Robust Methods. Math Geosci 40 233-248 +#' @keywords manip +#' @examples +#' +#' # Order matters +#' ilr(c(0.1, 1, 10)) +#' ilr(c(10, 1, 0.1)) +#' # Equal entries give ilr transformations with zeros as elements +#' ilr(c(3, 3, 3)) +#' # Almost equal entries give small numbers +#' ilr(c(0.3, 0.4, 0.3)) +#' # Only the ratio between the numbers counts, not their sum +#' invilr(ilr(c(0.7, 0.29, 0.01))) +#' invilr(ilr(2.1 * c(0.7, 0.29, 0.01))) +#' # Inverse transformation of larger numbers gives unequal elements +#' invilr(-10) +#' invilr(c(-10, 0)) +#' # The sum of the elements of the inverse ilr is 1 +#' sum(invilr(c(-10, 0))) +#' # This is why we do not need all elements of the inverse transformation to go back: +#' a <- c(0.1, 0.3, 0.5) +#' b <- invilr(a) +#' length(b) # Four elements +#' ilr(c(b[1:3], 1 - sum(b[1:3]))) # Gives c(0.1, 0.3, 0.5) +#' +#' @export ilr <- function(x) { z <- vector() for (i in 1:(length(x) - 1)) { @@ -24,6 +62,8 @@ ilr <- function(x) { return(z) } +#' @rdname ilr +#' @export invilr<-function(x) { D <- length(x) + 1 z <- c(x, 0) diff --git a/R/logLik.mkinfit.R b/R/logLik.mkinfit.R index d812f177..4ec3d7d4 100644 --- a/R/logLik.mkinfit.R +++ b/R/logLik.mkinfit.R @@ -1,24 +1,40 @@ -# Copyright (C) 2018,2019 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see +#' Calculated the log-likelihood of a fitted mkinfit object +#' +#' This function simply calculates the product of the likelihood densities +#' calculated using \code{\link{dnorm}}, i.e. assuming normal distribution, +#' with of the mean predicted by the degradation model, and the standard +#' deviation predicted by the error model. +#' +#' The total number of estimated parameters returned with the value of the +#' likelihood is calculated as the sum of fitted degradation model parameters +#' and the fitted error model parameters. +#' +#' @param object An object of class \code{\link{mkinfit}}. +#' @param \dots For compatibility with the generic method +#' @return An object of class \code{\link{logLik}} with the number of estimated +#' parameters (degradation model parameters plus variance model parameters) +#' as attribute. +#' @author Johannes Ranke +#' @seealso Compare the AIC of columns of \code{\link{mmkin}} objects using +#' \code{\link{AIC.mmkin}}. +#' @examples +#' +#' \dontrun{ +#' sfo_sfo <- mkinmod( +#' parent = mkinsub("SFO", to = "m1"), +#' m1 = mkinsub("SFO") +#' ) +#' d_t <- FOCUS_2006_D +#' f_nw <- mkinfit(sfo_sfo, d_t, quiet = TRUE) # no weighting (weights are unity) +#' f_obs <- mkinfit(sfo_sfo, d_t, error_model = "obs", quiet = TRUE) +#' f_tc <- mkinfit(sfo_sfo, d_t, error_model = "tc", quiet = TRUE) +#' AIC(f_nw, f_obs, f_tc) +#' } +#' +#' @export logLik.mkinfit <- function(object, ...) { val <- object$logLik # Number of estimated parameters attr(val, "df") <- length(object$bparms.optim) + length(object$errparms) return(val) } -# vim: set ts=2 sw=2 expandtab: diff --git a/R/logistic.solution.R b/R/logistic.solution.R index a3bddab3..d9db13d7 100644 --- a/R/logistic.solution.R +++ b/R/logistic.solution.R @@ -1,3 +1,58 @@ +#' Logistic kinetics +#' +#' Function describing exponential decline from a defined starting value, with +#' an increasing rate constant, supposedly caused by microbial growth +#' +#' @param t Time. +#' @param parent.0 Starting value for the response variable at time zero. +#' @param kmax Maximum rate constant. +#' @param k0 Minumum rate constant effective at time zero. +#' @param r Growth rate of the increase in the rate constant. +#' @return The value of the response variable at time \code{t}. +#' @note The solution of the logistic model reduces to the +#' \code{\link{SFO.solution}} if \code{k0} is equal to \code{kmax}. +#' @references FOCUS (2014) \dQuote{Generic guidance for Estimating Persistence +#' and Degradation Kinetics from Environmental Fate Studies on Pesticides in +#' EU Registration} Report of the FOCUS Work Group on Degradation Kinetics, +#' Version 1.1, 18 December 2014 +#' \url{http://esdac.jrc.ec.europa.eu/projects/degradation-kinetics} +#' @examples +#' +#' # Reproduce the plot on page 57 of FOCUS (2014) +#' plot(function(x) logistic.solution(x, 100, 0.08, 0.0001, 0.2), +#' from = 0, to = 100, ylim = c(0, 100), +#' xlab = "Time", ylab = "Residue") +#' plot(function(x) logistic.solution(x, 100, 0.08, 0.0001, 0.4), +#' from = 0, to = 100, add = TRUE, lty = 2, col = 2) +#' plot(function(x) logistic.solution(x, 100, 0.08, 0.0001, 0.8), +#' from = 0, to = 100, add = TRUE, lty = 3, col = 3) +#' plot(function(x) logistic.solution(x, 100, 0.08, 0.001, 0.2), +#' from = 0, to = 100, add = TRUE, lty = 4, col = 4) +#' plot(function(x) logistic.solution(x, 100, 0.08, 0.08, 0.2), +#' from = 0, to = 100, add = TRUE, lty = 5, col = 5) +#' legend("topright", inset = 0.05, +#' legend = paste0("k0 = ", c(0.0001, 0.0001, 0.0001, 0.001, 0.08), +#' ", r = ", c(0.2, 0.4, 0.8, 0.2, 0.2)), +#' lty = 1:5, col = 1:5) +#' +#' # Fit with synthetic data +#' logistic <- mkinmod(parent = mkinsub("logistic")) +#' +#' sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) +#' parms_logistic <- c(kmax = 0.08, k0 = 0.0001, r = 0.2) +#' d_logistic <- mkinpredict(logistic, +#' parms_logistic, c(parent = 100), +#' sampling_times) +#' d_2_1 <- add_err(d_logistic, +#' sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), +#' n = 1, reps = 2, digits = 5, LOD = 0.1, seed = 123456)[[1]] +#' +#' m <- mkinfit("logistic", d_2_1, quiet = TRUE) +#' plot_sep(m) +#' summary(m)$bpar +#' endpoints(m)$distimes +#' +#' @export logistic.solution <- function(t, parent.0, kmax, k0, r) { parent = parent.0 * (kmax / (kmax - k0 + k0 * exp (r * t))) ^(kmax/r) diff --git a/R/max_twa_parent.R b/R/max_twa_parent.R index 5129e369..ef3c0ada 100644 --- a/R/max_twa_parent.R +++ b/R/max_twa_parent.R @@ -1,21 +1,43 @@ -# Copyright (C) 2016-2019 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see - +#' Function to calculate maximum time weighted average concentrations from +#' kinetic models fitted with mkinfit +#' +#' This function calculates maximum moving window time weighted average +#' concentrations (TWAs) for kinetic models fitted with \code{\link{mkinfit}}. +#' Currently, only calculations for the parent are implemented for the SFO, +#' FOMC, DFOP and HS models, using the analytical formulas given in the PEC +#' soil section of the FOCUS guidance. +#' +#' @aliases max_twa_parent max_twa_sfo max_twa_fomc max_twa_dfop max_twa_hs +#' @param fit An object of class \code{\link{mkinfit}}. +#' @param windows The width of the time windows for which the TWAs should be +#' calculated. +#' @param M0 The initial concentration for which the maximum time weighted +#' average over the decline curve should be calculated. The default is to use +#' a value of 1, which means that a relative maximum time weighted average +#' factor (f_twa) is calculated. +#' @param k The rate constant in the case of SFO kinetics. +#' @param t The width of the time window. +#' @param alpha Parameter of the FOMC model. +#' @param beta Parameter of the FOMC model. +#' @param k1 The first rate constant of the DFOP or the HS kinetics. +#' @param k2 The second rate constant of the DFOP or the HS kinetics. +#' @param g Parameter of the DFOP model. +#' @param tb Parameter of the HS model. +#' @return For \code{max_twa_parent}, a numeric vector, named using the +#' \code{windows} argument. For the other functions, a numeric vector of +#' length one (also known as 'a number'). +#' @author Johannes Ranke +#' @references FOCUS (2006) \dQuote{Guidance Document on Estimating Persistence +#' and Degradation Kinetics from Environmental Fate Studies on Pesticides in +#' EU Registration} Report of the FOCUS Work Group on Degradation Kinetics, +#' EC Document Reference Sanco/10058/2005 version 2.0, 434 pp, +#' \url{http://esdac.jrc.ec.europa.eu/projects/degradation-kinetics} +#' @examples +#' +#' fit <- mkinfit("FOMC", FOCUS_2006_C, quiet = TRUE) +#' max_twa_parent(fit, c(7, 21)) +#' +#' @export max_twa_parent <- function(fit, windows) { parms.all <- c(fit$bparms.optim, fit$bparms.fixed) obs_vars <- fit$obs_vars @@ -74,15 +96,27 @@ max_twa_parent <- function(fit, windows) { names(res) <- windows return(res) } + +#' @rdname max_twa_parent +#' @export max_twa_sfo <- function(M0 = 1, k, t) { M0 * (1 - exp(- k * t)) / (k * t) } + +#' @rdname max_twa_parent +#' @export max_twa_fomc <- function(M0 = 1, alpha, beta, t) { M0 * (beta)/(t * (1 - alpha)) * ((t/beta + 1)^(1 - alpha) - 1) } + +#' @rdname max_twa_parent +#' @export max_twa_dfop <- function(M0 = 1, k1, k2, g, t) { M0/t * ((g/k1) * (1 - exp(- k1 * t)) + ((1 - g)/k2) * (1 - exp(- k2 * t))) } + +#' @rdname max_twa_parent +#' @export max_twa_hs <- function(M0 = 1, k1, k2, tb, t) { (M0 / t) * ( (1/k1) * (1 - exp(- k1 * tb)) + diff --git a/R/mkin_long_to_wide.R b/R/mkin_long_to_wide.R index 081262f8..0125f2da 100644 --- a/R/mkin_long_to_wide.R +++ b/R/mkin_long_to_wide.R @@ -1,28 +1,29 @@ -# Copyright (C) 2010-2011 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see - -mkin_long_to_wide <- function(long_data, time = "time", outtime = "time") -{ - colnames <- unique(long_data$name) - wide_data <- data.frame(time = subset(long_data, name == colnames[1], time)) - names(wide_data) <- outtime - for (var in colnames) { - wide_data[var] <- subset(long_data, name == var, value) - } - return(wide_data) -} +#' Convert a dataframe from long to wide format +#' +#' This function takes a dataframe in the long form, i.e. with a row for each +#' observed value, and converts it into a dataframe with one independent +#' variable and several dependent variables as columns. +#' +#' @param long_data The dataframe must contain one variable called "time" with +#' the time values specified by the \code{time} argument, one column called +#' "name" with the grouping of the observed values, and finally one column of +#' observed values called "value". +#' @param time The name of the time variable in the long input data. +#' @param outtime The name of the time variable in the wide output data. +#' @return Dataframe in wide format. +#' @author Johannes Ranke +#' @examples +#' +#' mkin_long_to_wide(FOCUS_2006_D) +#' +#' @export mkin_long_to_wide +mkin_long_to_wide <- function(long_data, time = "time", outtime = "time") +{ + colnames <- unique(long_data$name) + wide_data <- data.frame(time = subset(long_data, name == colnames[1], time)) + names(wide_data) <- outtime + for (var in colnames) { + wide_data[var] <- subset(long_data, name == var, value) + } + return(wide_data) +} diff --git a/R/mkin_wide_to_long.R b/R/mkin_wide_to_long.R index 21a7cbe9..bef0e408 100644 --- a/R/mkin_wide_to_long.R +++ b/R/mkin_wide_to_long.R @@ -1,34 +1,34 @@ -# $Id$ - -# Copyright (C) 2010-2013 Johannes Ranke -# Contact: mkin-devel@lists.berlios.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see -if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) - -mkin_wide_to_long <- function(wide_data, time = "t") -{ - colnames <- names(wide_data) - if (!(time %in% colnames)) stop("The data in wide format have to contain a variable named ", time, ".") - vars <- subset(colnames, colnames != time) - n <- length(colnames) - 1 - long_data <- data.frame( - name = rep(vars, each = length(wide_data[[time]])), - time = as.numeric(rep(wide_data[[time]], n)), - value = as.numeric(unlist(wide_data[vars])), - row.names = NULL) - return(long_data) -} +if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) + +#' Convert a dataframe with observations over time into long format +#' +#' This function simply takes a dataframe with one independent variable and +#' several dependent variable and converts it into the long form as required by +#' \code{\link{mkinfit}}. +#' +#' @param wide_data The dataframe must contain one variable with the time +#' values specified by the \code{time} argument and usually more than one +#' column of observed values. +#' @param time The name of the time variable. +#' @return Dataframe in long format as needed for \code{\link{mkinfit}}. +#' @author Johannes Ranke +#' @keywords manip +#' @examples +#' +#' wide <- data.frame(t = c(1,2,3), x = c(1,4,7), y = c(3,4,5)) +#' mkin_wide_to_long(wide) +#' +#' @export +mkin_wide_to_long <- function(wide_data, time = "t") +{ + colnames <- names(wide_data) + if (!(time %in% colnames)) stop("The data in wide format have to contain a variable named ", time, ".") + vars <- subset(colnames, colnames != time) + n <- length(colnames) - 1 + long_data <- data.frame( + name = rep(vars, each = length(wide_data[[time]])), + time = as.numeric(rep(wide_data[[time]], n)), + value = as.numeric(unlist(wide_data[vars])), + row.names = NULL) + return(long_data) +} diff --git a/R/mkinds.R b/R/mkinds.R index 5333ca26..a66adb14 100644 --- a/R/mkinds.R +++ b/R/mkinds.R @@ -1,21 +1,33 @@ -# Copyright (C) 2015,2018,2019 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see - +#' A dataset class for mkin +#' +#' A dataset class for mkin +#' +#' @name mkinds +#' @docType class +#' @format An \code{\link{R6Class}} generator object. +#' @section Fields: +#' +#' \describe{ \item{list("title")}{A full title for the dataset} +#' +#' \item{list("sampling")}{times The sampling times} +#' +#' \item{list("time_unit")}{The time unit} +#' +#' \item{list("observed")}{Names of the observed compounds} +#' +#' \item{list("unit")}{The unit of the observations} +#' +#' \item{list("replicates")}{The number of replicates} +#' +#' \item{list("data")}{A dataframe with at least the columns name, time and +#' value in order to be compatible with mkinfit} } +#' @importFrom R6 R6Class +#' @keywords datasets +#' @examples +#' +#' mds <- mkinds$new("FOCUS A", FOCUS_2006_A) +#' +#' @export mkinds <- R6Class("mkinds", public = list( title = NULL, @@ -42,6 +54,13 @@ mkinds <- R6Class("mkinds", ) ) +#' Print mkinds objects +#' +#' Print mkinds objects. +#' +#' @param x An \code{\link{mkinds}} object. +#' @param \dots Not used. +#' @export print.mkinds <- function(x, ...) { cat(" with $title: ", x$title, "\n") cat("Observed compounds $observed: ", paste(x$observed, collapse = ", "), "\n") diff --git a/R/mkinerrmin.R b/R/mkinerrmin.R index ce4877d2..0b647b81 100644 --- a/R/mkinerrmin.R +++ b/R/mkinerrmin.R @@ -1,22 +1,43 @@ -# Copyright (C) 2010-2019 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "value_mean")) +#' Calculate the minimum error to assume in order to pass the variance test +#' +#' This function finds the smallest relative error still resulting in passing +#' the chi-squared test as defined in the FOCUS kinetics report from 2006. +#' +#' This function is used internally by \code{\link{summary.mkinfit}}. +#' +#' @param fit an object of class \code{\link{mkinfit}}. +#' @param alpha The confidence level chosen for the chi-squared test. +#' @importFrom stats qchisq aggregate +#' @return A dataframe with the following components: \item{err.min}{The +#' relative error, expressed as a fraction.} \item{n.optim}{The number of +#' optimised parameters attributed to the data series.} \item{df}{The number of +#' remaining degrees of freedom for the chi2 error level calculations. Note +#' that mean values are used for the chi2 statistic and therefore every time +#' point with observed values in the series only counts one time.} The +#' dataframe has one row for the total dataset and one further row for each +#' observed state variable in the model. +#' @references FOCUS (2006) \dQuote{Guidance Document on Estimating Persistence +#' and Degradation Kinetics from Environmental Fate Studies on Pesticides in EU +#' Registration} Report of the FOCUS Work Group on Degradation Kinetics, EC +#' Document Reference Sanco/10058/2005 version 2.0, 434 pp, +#' \url{http://esdac.jrc.ec.europa.eu/projects/degradation-kinetics} +#' @keywords manip +#' @examples +#' +#' SFO_SFO = mkinmod(parent = mkinsub("SFO", to = "m1"), +#' m1 = mkinsub("SFO"), +#' use_of_ff = "max") +#' +#' fit_FOCUS_D = mkinfit(SFO_SFO, FOCUS_2006_D, quiet = TRUE) +#' round(mkinerrmin(fit_FOCUS_D), 4) +#' \dontrun{ +#' fit_FOCUS_E = mkinfit(SFO_SFO, FOCUS_2006_E, quiet = TRUE) +#' round(mkinerrmin(fit_FOCUS_E), 4) +#' } +#' +#' @export mkinerrmin <- function(fit, alpha = 0.05) { parms.optim <- fit$par diff --git a/R/mkinerrplot.R b/R/mkinerrplot.R index 6153a3c0..36e22a43 100644 --- a/R/mkinerrplot.R +++ b/R/mkinerrplot.R @@ -1,22 +1,44 @@ -# Copyright (C) 2008-2014,2019 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see if(getRversion() >= '2.15.1') utils::globalVariables(c("variable", "residual")) +#' Function to plot squared residuals and the error model for an mkin object +#' +#' This function plots the squared residuals for the specified subset of the +#' observed variables from an mkinfit object. In addition, one or more dashed +#' line(s) show the fitted error model. A combined plot of the fitted model +#' and this error model plot can be obtained with \code{\link{plot.mkinfit}} +#' using the argument \code{show_errplot = TRUE}. +#' +#' @param object A fit represented in an \code{\link{mkinfit}} object. +#' @param obs_vars A character vector of names of the observed variables for +#' which residuals should be plotted. Defaults to all observed variables in +#' the model +#' @param xlim plot range in x direction. +#' @param xlab Label for the x axis. +#' @param ylab Label for the y axis. +#' @param maxy Maximum value of the residuals. This is used for the scaling of +#' the y axis and defaults to "auto". +#' @param legend Should a legend be plotted? +#' @param lpos Where should the legend be placed? Default is "topright". Will +#' be passed on to \code{\link{legend}}. +#' @param col_obs Colors for the observed variables. +#' @param pch_obs Symbols to be used for the observed variables. +#' @param frame Should a frame be drawn around the plots? +#' @param \dots further arguments passed to \code{\link{plot}}. +#' @return Nothing is returned by this function, as it is called for its side +#' effect, namely to produce a plot. +#' @author Johannes Ranke +#' @seealso \code{\link{mkinplot}}, for a way to plot the data and the fitted +#' lines of the mkinfit object. +#' @keywords hplot +#' @examples +#' +#' \dontrun{ +#' model <- mkinmod(parent = mkinsub("SFO", "m1"), m1 = mkinsub("SFO")) +#' fit <- mkinfit(model, FOCUS_2006_D, error_model = "tc", quiet = TRUE) +#' mkinerrplot(fit) +#' } +#' +#' @export mkinerrplot <- function (object, obs_vars = names(object$mkinmod$map), xlim = c(0, 1.1 * max(object$data$predicted)), diff --git a/R/mkinfit.R b/R/mkinfit.R index d182f5d0..17fd59d0 100644 --- a/R/mkinfit.R +++ b/R/mkinfit.R @@ -1,904 +1,897 @@ -# Copyright (C) 2010-2019 Johannes Ranke -# Portions of this code are copyright (C) 2013 Eurofins Regulatory AG -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see -if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) - -mkinfit <- function(mkinmod, observed, - parms.ini = "auto", - state.ini = "auto", - err.ini = "auto", - fixed_parms = NULL, - fixed_initials = names(mkinmod$diffs)[-1], - from_max_mean = FALSE, - solution_type = c("auto", "analytical", "eigen", "deSolve"), - method.ode = "lsoda", - use_compiled = "auto", - control = list(eval.max = 300, iter.max = 200), - transform_rates = TRUE, - transform_fractions = TRUE, - quiet = FALSE, - atol = 1e-8, rtol = 1e-10, n.outtimes = 100, - error_model = c("const", "obs", "tc"), - error_model_algorithm = c("auto", "d_3", "direct", "twostep", "threestep", "fourstep", "IRLS", "OLS"), - reweight.tol = 1e-8, reweight.max.iter = 10, - trace_parms = FALSE, - ...) -{ - # Check mkinmod and generate a model for the variable whith the highest value - # if a suitable string is given - parent_models_available = c("SFO", "FOMC", "DFOP", "HS", "SFORB", "IORE", "logistic") - if (class(mkinmod) != "mkinmod") { - presumed_parent_name = observed[which.max(observed$value), "name"] - if (mkinmod[[1]] %in% parent_models_available) { - speclist <- list(list(type = mkinmod, sink = TRUE)) - names(speclist) <- presumed_parent_name - mkinmod <- mkinmod(speclist = speclist) - } else { - stop("Argument mkinmod must be of class mkinmod or a string containing one of\n ", - paste(parent_models_available, collapse = ", ")) - } - } - - # Get the names of the state variables in the model - mod_vars <- names(mkinmod$diffs) - - # Get the names of observed variables - obs_vars <- names(mkinmod$spec) - - # Subset observed data with names of observed data in the model and remove NA values - observed <- subset(observed, name %in% obs_vars) - observed <- subset(observed, !is.na(value)) - - # Also remove zero values to avoid instabilities (e.g. of the 'tc' error model) - if (any(observed$value == 0)) { - warning("Observations with value of zero were removed from the data") - observed <- subset(observed, value != 0) - } - - # Obtain data for decline from maximum mean value if requested - if (from_max_mean) { - # This is only used for simple decline models - if (length(obs_vars) > 1) - stop("Decline from maximum is only implemented for models with a single observed variable") - - means <- aggregate(value ~ time, data = observed, mean, na.rm=TRUE) - t_of_max <- means[which.max(means$value), "time"] - observed <- subset(observed, time >= t_of_max) - observed$time <- observed$time - t_of_max - } - - # Number observations used for fitting - n_observed <- nrow(observed) - - # Define starting values for parameters where not specified by the user - if (parms.ini[[1]] == "auto") parms.ini = vector() - - # Warn for inital parameter specifications that are not in the model - wrongpar.names <- setdiff(names(parms.ini), mkinmod$parms) - if (length(wrongpar.names) > 0) { - warning("Initial parameter(s) ", paste(wrongpar.names, collapse = ", "), - " not used in the model") - parms.ini <- parms.ini[setdiff(names(parms.ini), wrongpar.names)] - } - - # Warn that the sum of formation fractions may exceed one if they are not - # fitted in the transformed way - if (mkinmod$use_of_ff == "max" & transform_fractions == FALSE) { - warning("The sum of formation fractions may exceed one if you do not use ", - "transform_fractions = TRUE." ) - for (box in mod_vars) { - # Stop if formation fractions are not transformed and we have no sink - if (mkinmod$spec[[box]]$sink == FALSE) { - stop("If formation fractions are not transformed during the fitting, ", - "it is not supported to turn off pathways to sink.\n ", - "Consider turning on the transformation of formation fractions or ", - "setting up a model with use_of_ff = 'min'.\n") - } - } - } - - # Do not allow fixing formation fractions if we are using the ilr transformation, - # this is not supported - if (transform_fractions == TRUE && length(fixed_parms) > 0) { - if (any(grepl("^f_", fixed_parms))) { - stop("Fixing formation fractions is not supported when using the ilr ", - "transformation.") - } - } - - # Set initial parameter values, including a small increment (salt) - # to avoid linear dependencies (singular matrix) in Eigenvalue based solutions - k_salt = 0 - defaultpar.names <- setdiff(mkinmod$parms, names(parms.ini)) - for (parmname in defaultpar.names) { - # Default values for rate constants, depending on the parameterisation - if (grepl("^k", parmname)) { - parms.ini[parmname] = 0.1 + k_salt - k_salt = k_salt + 1e-4 - } - # Default values for rate constants for reversible binding - if (grepl("free_bound$", parmname)) parms.ini[parmname] = 0.1 - if (grepl("bound_free$", parmname)) parms.ini[parmname] = 0.02 - # Default values for IORE exponents - if (grepl("^N", parmname)) parms.ini[parmname] = 1.1 - # Default values for the FOMC, DFOP and HS models - if (parmname == "alpha") parms.ini[parmname] = 1 - if (parmname == "beta") parms.ini[parmname] = 10 - if (parmname == "k1") parms.ini[parmname] = 0.1 - if (parmname == "k2") parms.ini[parmname] = 0.01 - if (parmname == "tb") parms.ini[parmname] = 5 - if (parmname == "g") parms.ini[parmname] = 0.5 - if (parmname == "kmax") parms.ini[parmname] = 0.1 - if (parmname == "k0") parms.ini[parmname] = 0.0001 - if (parmname == "r") parms.ini[parmname] = 0.2 - } - # Default values for formation fractions in case they are present - for (box in mod_vars) { - f_names <- mkinmod$parms[grep(paste0("^f_", box), mkinmod$parms)] - if (length(f_names) > 0) { - # We need to differentiate between default and specified fractions - # and set the unspecified to 1 - sum(specified)/n_unspecified - f_default_names <- intersect(f_names, defaultpar.names) - f_specified_names <- setdiff(f_names, defaultpar.names) - sum_f_specified = sum(parms.ini[f_specified_names]) - if (sum_f_specified > 1) { - stop("Starting values for the formation fractions originating from ", - box, " sum up to more than 1.") - } - if (mkinmod$spec[[box]]$sink) n_unspecified = length(f_default_names) + 1 - else { - n_unspecified = length(f_default_names) - } - parms.ini[f_default_names] <- (1 - sum_f_specified) / n_unspecified - } - } - - # Set default for state.ini if appropriate - parent_name = names(mkinmod$spec)[[1]] - if (state.ini[1] == "auto") { - parent_time_0 = subset(observed, time == 0 & name == parent_name)$value - parent_time_0_mean = mean(parent_time_0, na.rm = TRUE) - if (is.na(parent_time_0_mean)) { - state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)) - } else { - state.ini = c(parent_time_0_mean, rep(0, length(mkinmod$diffs) - 1)) - } - } - - # Name the inital state variable values if they are not named yet - if(is.null(names(state.ini))) names(state.ini) <- mod_vars - - # Transform initial parameter values for fitting - transparms.ini <- transform_odeparms(parms.ini, mkinmod, - transform_rates = transform_rates, - transform_fractions = transform_fractions) - - # Parameters to be optimised: - # Kinetic parameters in parms.ini whose names are not in fixed_parms - parms.fixed <- parms.ini[fixed_parms] - parms.optim <- parms.ini[setdiff(names(parms.ini), fixed_parms)] - - transparms.fixed <- transform_odeparms(parms.fixed, mkinmod, - transform_rates = transform_rates, - transform_fractions = transform_fractions) - transparms.optim <- transform_odeparms(parms.optim, mkinmod, - transform_rates = transform_rates, - transform_fractions = transform_fractions) - - # Inital state variables in state.ini whose names are not in fixed_initials - state.ini.fixed <- state.ini[fixed_initials] - state.ini.optim <- state.ini[setdiff(names(state.ini), fixed_initials)] - - # Preserve names of state variables before renaming initial state variable - # parameters - state.ini.optim.boxnames <- names(state.ini.optim) - state.ini.fixed.boxnames <- names(state.ini.fixed) - if(length(state.ini.optim) > 0) { - names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_") - } - if(length(state.ini.fixed) > 0) { - names(state.ini.fixed) <- paste(names(state.ini.fixed), "0", sep="_") - } - - # Decide if the solution of the model can be based on a simple analytical - # formula, the spectral decomposition of the matrix (fundamental system) - # or a numeric ode solver from the deSolve package - # Prefer deSolve over eigen if a compiled model is present and use_compiled - # is not set to FALSE - solution_type = match.arg(solution_type) - if (solution_type == "analytical" && length(mkinmod$spec) > 1) - stop("Analytical solution not implemented for models with metabolites.") - if (solution_type == "eigen" && !is.matrix(mkinmod$coefmat)) - stop("Eigenvalue based solution not possible, coefficient matrix not present.") - if (solution_type == "auto") { - if (length(mkinmod$spec) == 1) { - solution_type = "analytical" - } else { - if (!is.null(mkinmod$cf) & use_compiled[1] != FALSE) { - solution_type = "deSolve" - } else { - if (is.matrix(mkinmod$coefmat)) { - solution_type = "eigen" - if (max(observed$value, na.rm = TRUE) < 0.1) { - stop("The combination of small observed values (all < 0.1) and solution_type = eigen is error-prone") - } - } else { - solution_type = "deSolve" - } - } - } - } - - # Get the error model and the algorithm for fitting - err_mod <- match.arg(error_model) - error_model_algorithm = match.arg(error_model_algorithm) - if (error_model_algorithm == "OLS") { - if (err_mod != "const") stop("OLS is only appropriate for constant variance") - } - if (error_model_algorithm == "auto") { - error_model_algorithm = switch(err_mod, - const = "OLS", obs = "d_3", tc = "d_3") - } - errparm_names <- switch(err_mod, - "const" = "sigma", - "obs" = paste0("sigma_", obs_vars), - "tc" = c("sigma_low", "rsd_high")) - errparm_names_optim <- if (error_model_algorithm == "OLS") NULL else errparm_names - - # Define starting values for the error model - if (err.ini[1] != "auto") { - if (!identical(names(err.ini), errparm_names)) { - stop("Please supply initial values for error model components ", paste(errparm_names, collapse = ", ")) - } else { - errparms = err.ini - } - } else { - if (err_mod == "const") { - errparms = 3 - } - if (err_mod == "obs") { - errparms = rep(3, length(obs_vars)) - } - if (err_mod == "tc") { - errparms <- c(sigma_low = 0.1, rsd_high = 0.1) - } - names(errparms) <- errparm_names - } - if (error_model_algorithm == "OLS") { - errparms_optim <- NULL - } else { - errparms_optim <- errparms - } - - # Define outtimes for model solution. - # Include time points at which observed data are available - outtimes = sort(unique(c(observed$time, seq(min(observed$time), - max(observed$time), - length.out = n.outtimes)))) - - # Define the objective function for optimisation, including (back)transformations - cost_function <- function(P, trans = TRUE, OLS = FALSE, fixed_degparms = FALSE, fixed_errparms = FALSE, update_data = TRUE, ...) - { - assign("calls", calls + 1, inherits = TRUE) # Increase the model solution counter - - # Trace parameter values if requested and if we are actually optimising - if(trace_parms & update_data) cat(P, "\n") - - # Determine local parameter values for the cost estimation - if (is.numeric(fixed_degparms)) { - cost_degparms <- fixed_degparms - cost_errparms <- P - degparms_fixed = TRUE - } else { - degparms_fixed = FALSE - } - - if (is.numeric(fixed_errparms)) { - cost_degparms <- P - cost_errparms <- fixed_errparms - errparms_fixed = TRUE - } else { - errparms_fixed = FALSE - } - - if (OLS) { - cost_degparms <- P - cost_errparms <- numeric(0) - } - - if (!OLS & !degparms_fixed & !errparms_fixed) { - cost_degparms <- P[1:(length(P) - length(errparms))] - cost_errparms <- P[(length(cost_degparms) + 1):length(P)] - } - - # Initial states for t0 - if(length(state.ini.optim) > 0) { - odeini <- c(cost_degparms[1:length(state.ini.optim)], state.ini.fixed) - names(odeini) <- c(state.ini.optim.boxnames, state.ini.fixed.boxnames) - } else { - odeini <- state.ini.fixed - names(odeini) <- state.ini.fixed.boxnames - } - - odeparms.optim <- cost_degparms[(length(state.ini.optim) + 1):length(cost_degparms)] - - if (trans == TRUE) { - odeparms <- c(odeparms.optim, transparms.fixed) - parms <- backtransform_odeparms(odeparms, mkinmod, - transform_rates = transform_rates, - transform_fractions = transform_fractions) - } else { - parms <- c(odeparms.optim, parms.fixed) - } - - # Solve the system with current parameter values - out <- mkinpredict(mkinmod, parms, - odeini, outtimes, - solution_type = solution_type, - use_compiled = use_compiled, - method.ode = method.ode, - atol = atol, rtol = rtol, ...) - - out_long <- mkin_wide_to_long(out, time = "time") - - if (err_mod == "const") { - observed$std <- if (OLS) NA else cost_errparms["sigma"] - } - if (err_mod == "obs") { - std_names <- paste0("sigma_", observed$name) - observed$std <- cost_errparms[std_names] - } - if (err_mod == "tc") { - tmp <- merge(observed, out_long, by = c("time", "name")) - tmp$name <- ordered(tmp$name, levels = obs_vars) - tmp <- tmp[order(tmp$name, tmp$time), ] - observed$std <- sqrt(cost_errparms["sigma_low"]^2 + tmp$value.y^2 * cost_errparms["rsd_high"]^2) - } - - cost_data <- merge(observed[c("name", "time", "value", "std")], out_long, - by = c("name", "time"), suffixes = c(".observed", ".predicted")) - - if (OLS) { - # Cost is the sum of squared residuals - cost <- with(cost_data, sum((value.observed - value.predicted)^2)) - } else { - # Cost is the negative log-likelihood - cost <- - with(cost_data, - sum(dnorm(x = value.observed, mean = value.predicted, sd = std, log = TRUE))) - } - - # We update the current cost and data during the optimisation, not - # during hessian calculations - if (update_data) { - - assign("out_predicted", out_long, inherits = TRUE) - assign("current_data", cost_data, inherits = TRUE) - - if (cost < cost.current) { - assign("cost.current", cost, inherits = TRUE) - if (!quiet) cat(ifelse(OLS, "Sum of squared residuals", "Negative log-likelihood"), - " at call ", calls, ": ", cost.current, "\n", sep = "") - } - } - return(cost) - } - - names_optim <- c(names(state.ini.optim), - names(transparms.optim), - errparm_names_optim) - n_optim <- length(names_optim) - - # Define lower and upper bounds other than -Inf and Inf for parameters - # for which no internal transformation is requested in the call to mkinfit - # and for optimised error model parameters - lower <- rep(-Inf, n_optim) - upper <- rep(Inf, n_optim) - names(lower) <- names(upper) <- names_optim - - # IORE exponents are not transformed, but need a lower bound - index_N <- grep("^N", names(lower)) - lower[index_N] <- 0 - - if (!transform_rates) { - index_k <- grep("^k_", names(lower)) - lower[index_k] <- 0 - index_k__iore <- grep("^k__iore_", names(lower)) - lower[index_k__iore] <- 0 - other_rate_parms <- intersect(c("alpha", "beta", "k1", "k2", "tb", "r"), names(lower)) - lower[other_rate_parms] <- 0 - } - - if (!transform_fractions) { - index_f <- grep("^f_", names(upper)) - lower[index_f] <- 0 - upper[index_f] <- 1 - other_fraction_parms <- intersect(c("g"), names(upper)) - lower[other_fraction_parms] <- 0 - upper[other_fraction_parms] <- 1 - } - - if (err_mod == "const") { - if (error_model_algorithm != "OLS") { - lower["sigma"] <- 0 - } - } - if (err_mod == "obs") { - index_sigma <- grep("^sigma_", names(lower)) - lower[index_sigma] <- 0 - } - if (err_mod == "tc") { - lower["sigma_low"] <- 0 - lower["rsd_high"] <- 0 - } - - # Counter for cost function evaluations - calls = 0 - cost.current <- Inf - out_predicted <- NA - current_data <- NA - - # Show parameter names if tracing is requested - if(trace_parms) cat(names_optim, "\n") - - # browser() - - # Do the fit and take the time until the hessians are calculated - fit_time <- system.time({ - degparms <- c(state.ini.optim, transparms.optim) - n_degparms <- length(degparms) - degparms_index <- seq(1, n_degparms) - errparms_index <- seq(n_degparms + 1, length.out = length(errparms)) - - if (error_model_algorithm == "d_3") { - if (!quiet) message("Directly optimising the complete model") - parms.start <- c(degparms, errparms) - fit_direct <- nlminb(parms.start, cost_function, - lower = lower[names(parms.start)], - upper = upper[names(parms.start)], - control = control, ...) - fit_direct$logLik <- - cost.current - if (error_model_algorithm == "direct") { - degparms <- fit_direct$par[degparms_index] - errparms <- fit_direct$par[errparms_index] - } else { - cost.current <- Inf # reset to avoid conflict with the OLS step - } - } - if (error_model_algorithm != "direct") { - if (!quiet) message("Ordinary least squares optimisation") - fit <- nlminb(degparms, cost_function, control = control, - lower = lower[names(degparms)], - upper = upper[names(degparms)], OLS = TRUE, ...) - degparms <- fit$par - - # Get the maximum likelihood estimate for sigma at the optimum parameter values - current_data$residual <- current_data$value.observed - current_data$value.predicted - sigma_mle <- sqrt(sum(current_data$residual^2)/nrow(current_data)) - - # Use that estimate for the constant variance, or as first guess if err_mod = "obs" - if (err_mod != "tc") { - errparms[names(errparms)] <- sigma_mle - } - fit$par <- c(fit$par, errparms) - - cost.current <- cost_function(c(degparms, errparms), OLS = FALSE) - fit$logLik <- - cost.current - } - if (error_model_algorithm %in% c("threestep", "fourstep", "d_3")) { - if (!quiet) message("Optimising the error model") - fit <- nlminb(errparms, cost_function, control = control, - lower = lower[names(errparms)], - upper = upper[names(errparms)], - fixed_degparms = degparms, ...) - errparms <- fit$par - } - if (error_model_algorithm == "fourstep") { - if (!quiet) message("Optimising the degradation model") - fit <- nlminb(degparms, cost_function, control = control, - lower = lower[names(degparms)], - upper = upper[names(degparms)], - fixed_errparms = errparms, ...) - degparms <- fit$par - } - if (error_model_algorithm %in% - c("direct", "twostep", "threestep", "fourstep", "d_3")) { - if (!quiet) message("Optimising the complete model") - parms.start <- c(degparms, errparms) - fit <- nlminb(parms.start, cost_function, - lower = lower[names(parms.start)], - upper = upper[names(parms.start)], - control = control, ...) - degparms <- fit$par[degparms_index] - errparms <- fit$par[errparms_index] - fit$logLik <- - cost.current - - if (error_model_algorithm == "d_3") { - d_3_messages = c( - same = "Direct fitting and three-step fitting yield approximately the same likelihood", - threestep = "Three-step fitting yielded a higher likelihood than direct fitting", - direct = "Direct fitting yielded a higher likelihood than three-step fitting") - rel_diff <- abs((fit_direct$logLik - fit$logLik))/-mean(c(fit_direct$logLik, fit$logLik)) - if (rel_diff < 0.0001) { - if (!quiet) message(d_3_messages["same"]) - fit$d_3_message <- d_3_messages["same"] - } else { - if (fit$logLik > fit_direct$logLik) { - if (!quiet) message(d_3_messages["threestep"]) - fit$d_3_message <- d_3_messages["threestep"] - } else { - if (!quiet) message(d_3_messages["direct"]) - fit <- fit_direct - fit$d_3_message <- d_3_messages["direct"] - } - } - } - } - if (err_mod != "const" & error_model_algorithm == "IRLS") { - reweight.diff <- 1 - n.iter <- 0 - errparms_last <- errparms - - while (reweight.diff > reweight.tol & - n.iter < reweight.max.iter) { - - if (!quiet) message("Optimising the error model") - fit <- nlminb(errparms, cost_function, control = control, - lower = lower[names(errparms)], - upper = upper[names(errparms)], - fixed_degparms = degparms, ...) - errparms <- fit$par - - if (!quiet) message("Optimising the degradation model") - fit <- nlminb(degparms, cost_function, control = control, - lower = lower[names(degparms)], - upper = upper[names(degparms)], - fixed_errparms = errparms, ...) - degparms <- fit$par - - reweight.diff <- dist(rbind(errparms, errparms_last)) - errparms_last <- errparms - - fit$par <- c(fit$par, errparms) - cost.current <- cost_function(c(degparms, errparms), OLS = FALSE) - fit$logLik <- - cost.current - } - } - - fit$hessian <- try(numDeriv::hessian(cost_function, c(degparms, errparms), OLS = FALSE, - update_data = FALSE), silent = TRUE) - - # Backtransform parameters - bparms.optim = backtransform_odeparms(fit$par, mkinmod, - transform_rates = transform_rates, - transform_fractions = transform_fractions) - bparms.fixed = c(state.ini.fixed, parms.fixed) - bparms.all = c(bparms.optim, parms.fixed) - - fit$hessian_notrans <- try(numDeriv::hessian(cost_function, c(bparms.all, errparms), - OLS = FALSE, trans = FALSE, update_data = FALSE), silent = TRUE) - }) - - fit$error_model_algorithm <- error_model_algorithm - - if (fit$convergence != 0) { - fit$warning = paste0("Optimisation did not converge:\n", fit$message) - warning(fit$warning) - } else { - if(!quiet) message("Optimisation successfully terminated.\n") - } - - # We need to return some more data for summary and plotting - fit$solution_type <- solution_type - fit$transform_rates <- transform_rates - fit$transform_fractions <- transform_fractions - fit$reweight.tol <- reweight.tol - fit$reweight.max.iter <- reweight.max.iter - fit$control <- control - fit$calls <- calls - fit$time <- fit_time - - # We also need the model for summary and plotting - fit$mkinmod <- mkinmod - - # We need data and predictions for summary and plotting - fit$observed <- observed - fit$obs_vars <- obs_vars - fit$predicted <- out_predicted - - # Residual sum of squares as a function of the fitted parameters - fit$rss <- function(P) cost_function(P, OLS = TRUE, update_data = FALSE) - - # Log-likelihood with possibility to fix degparms or errparms - fit$ll <- function(P, fixed_degparms = FALSE, fixed_errparms = FALSE) { - - cost_function(P, fixed_degparms = fixed_degparms, - fixed_errparms = fixed_errparms, OLS = FALSE, update_data = FALSE) - } - - # Collect initial parameter values in three dataframes - fit$start <- data.frame(value = c(state.ini.optim, - parms.optim, errparms_optim)) - fit$start$type = c(rep("state", length(state.ini.optim)), - rep("deparm", length(parms.optim)), - rep("error", length(errparms_optim))) - - fit$start_transformed = data.frame( - value = c(state.ini.optim, transparms.optim, errparms_optim), - lower = lower, - upper = upper) - - fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) - fit$fixed$type = c(rep("state", length(state.ini.fixed)), - rep("deparm", length(parms.fixed))) - - # Sort observed, predicted and residuals - current_data$name <- ordered(current_data$name, levels = obs_vars) - - ordered_data <- current_data[order(current_data$name, current_data$time), ] - - fit$data <- data.frame(time = ordered_data$time, - variable = ordered_data$name, - observed = ordered_data$value.observed, - predicted = ordered_data$value.predicted) - - fit$data$residual <- fit$data$observed - fit$data$predicted - - fit$atol <- atol - fit$rtol <- rtol - fit$err_mod <- err_mod - - # Return different sets of backtransformed parameters for summary and plotting - fit$bparms.optim <- bparms.optim - fit$bparms.fixed <- bparms.fixed - - # Return ode and state parameters for further fitting - fit$bparms.ode <- bparms.all[mkinmod$parms] - fit$bparms.state <- c(bparms.all[setdiff(names(bparms.all), names(fit$bparms.ode))], - state.ini.fixed) - names(fit$bparms.state) <- gsub("_0$", "", names(fit$bparms.state)) - - fit$errparms <- errparms - fit$df.residual <- n_observed - length(c(degparms, errparms)) - - fit$date <- date() - fit$version <- as.character(utils::packageVersion("mkin")) - fit$Rversion <- paste(R.version$major, R.version$minor, sep=".") - - class(fit) <- c("mkinfit", "modFit") - return(fit) -} - -summary.mkinfit <- function(object, data = TRUE, distimes = TRUE, alpha = 0.05, ...) { - param <- object$par - pnames <- names(param) - bpnames <- names(object$bparms.optim) - epnames <- names(object$errparms) - p <- length(param) - mod_vars <- names(object$mkinmod$diffs) - covar <- try(solve(object$hessian), silent = TRUE) - covar_notrans <- try(solve(object$hessian_notrans), silent = TRUE) - rdf <- object$df.residual - - if (!is.numeric(covar) | is.na(covar[1])) { - covar <- NULL - se <- lci <- uci <- rep(NA, p) - } else { - rownames(covar) <- colnames(covar) <- pnames - se <- sqrt(diag(covar)) - lci <- param + qt(alpha/2, rdf) * se - uci <- param + qt(1-alpha/2, rdf) * se - } - - beparms.optim <- c(object$bparms.optim, object$par[epnames]) - if (!is.numeric(covar_notrans) | is.na(covar_notrans[1])) { - covar_notrans <- NULL - se_notrans <- tval <- pval <- rep(NA, p) - } else { - rownames(covar_notrans) <- colnames(covar_notrans) <- c(bpnames, epnames) - se_notrans <- sqrt(diag(covar_notrans)) - tval <- beparms.optim / se_notrans - pval <- pt(abs(tval), rdf, lower.tail = FALSE) - } - - names(se) <- pnames - - param <- cbind(param, se, lci, uci) - dimnames(param) <- list(pnames, c("Estimate", "Std. Error", "Lower", "Upper")) - - bparam <- cbind(Estimate = beparms.optim, se_notrans, - "t value" = tval, "Pr(>t)" = pval, Lower = NA, Upper = NA) - - # Transform boundaries of CI for one parameter at a time, - # with the exception of sets of formation fractions (single fractions are OK). - f_names_skip <- character(0) - for (box in mod_vars) { # Figure out sets of fractions to skip - f_names <- grep(paste("^f", box, sep = "_"), pnames, value = TRUE) - n_paths <- length(f_names) - if (n_paths > 1) f_names_skip <- c(f_names_skip, f_names) - } - - for (pname in pnames) { - if (!pname %in% f_names_skip) { - par.lower <- param[pname, "Lower"] - par.upper <- param[pname, "Upper"] - names(par.lower) <- names(par.upper) <- pname - bpl <- backtransform_odeparms(par.lower, object$mkinmod, - object$transform_rates, - object$transform_fractions) - bpu <- backtransform_odeparms(par.upper, object$mkinmod, - object$transform_rates, - object$transform_fractions) - bparam[names(bpl), "Lower"] <- bpl - bparam[names(bpu), "Upper"] <- bpu - } - } - bparam[epnames, c("Lower", "Upper")] <- param[epnames, c("Lower", "Upper")] - - ans <- list( - version = as.character(utils::packageVersion("mkin")), - Rversion = paste(R.version$major, R.version$minor, sep="."), - date.fit = object$date, - date.summary = date(), - solution_type = object$solution_type, - warning = object$warning, - use_of_ff = object$mkinmod$use_of_ff, - error_model_algorithm = object$error_model_algorithm, - df = c(p, rdf), - covar = covar, - covar_notrans = covar_notrans, - err_mod = object$err_mod, - niter = object$iterations, - calls = object$calls, - time = object$time, - par = param, - bpar = bparam) - - if (!is.null(object$version)) { - ans$fit_version <- object$version - ans$fit_Rversion <- object$Rversion - } - - ans$diffs <- object$mkinmod$diffs - if(data) ans$data <- object$data - ans$start <- object$start - ans$start_transformed <- object$start_transformed - - ans$fixed <- object$fixed - - ans$errmin <- mkinerrmin(object, alpha = 0.05) - - if (object$calls > 0) { - if (!is.null(ans$covar)){ - Corr <- cov2cor(ans$covar) - rownames(Corr) <- colnames(Corr) <- rownames(ans$par) - ans$Corr <- Corr - } else { - warning("Could not calculate correlation; no covariance matrix") - } - } - - ans$bparms.ode <- object$bparms.ode - ep <- endpoints(object) - if (length(ep$ff) != 0) - ans$ff <- ep$ff - if (distimes) ans$distimes <- ep$distimes - if (length(ep$SFORB) != 0) ans$SFORB <- ep$SFORB - if (!is.null(object$d_3_message)) ans$d_3_message <- object$d_3_message - class(ans) <- c("summary.mkinfit", "summary.modFit") - return(ans) -} - -# Expanded from print.summary.modFit -print.summary.mkinfit <- function(x, digits = max(3, getOption("digits") - 3), ...) { - if (is.null(x$fit_version)) { - cat("mkin version: ", x$version, "\n") - cat("R version: ", x$Rversion, "\n") - } else { - cat("mkin version used for fitting: ", x$fit_version, "\n") - cat("R version used for fitting: ", x$fit_Rversion, "\n") - } - - cat("Date of fit: ", x$date.fit, "\n") - cat("Date of summary:", x$date.summary, "\n") - - if (!is.null(x$warning)) cat("\n\nWarning:", x$warning, "\n\n") - - cat("\nEquations:\n") - nice_diffs <- gsub("^(d.*) =", "\\1/dt =", x[["diffs"]]) - writeLines(strwrap(nice_diffs, exdent = 11)) - df <- x$df - rdf <- df[2] - - cat("\nModel predictions using solution type", x$solution_type, "\n") - - cat("\nFitted using", x$calls, "model solutions performed in", x$time[["elapsed"]], "s\n") - - if (!is.null(x$err_mod)) { - cat("\nError model: ") - cat(switch(x$err_mod, - const = "Constant variance", - obs = "Variance unique to each observed variable", - tc = "Two-component variance function"), "\n") - - cat("\nError model algorithm:", x$error_model_algorithm, "\n") - if (!is.null(x$d_3_message)) cat(x$d_3_message, "\n") - } - - cat("\nStarting values for parameters to be optimised:\n") - print(x$start) - - cat("\nStarting values for the transformed parameters actually optimised:\n") - print(x$start_transformed) - - cat("\nFixed parameter values:\n") - if(length(x$fixed$value) == 0) cat("None\n") - else print(x$fixed) - - cat("\nOptimised, transformed parameters with symmetric confidence intervals:\n") - print(signif(x$par, digits = digits)) - - if (x$calls > 0) { - cat("\nParameter correlation:\n") - if (!is.null(x$covar)){ - print(x$Corr, digits = digits, ...) - } else { - cat("No covariance matrix") - } - } - - cat("\nBacktransformed parameters:\n") - cat("Confidence intervals for internally transformed parameters are asymmetric.\n") - if ((x$version) < "0.9-36") { - cat("To get the usual (questionable) t-test, upgrade mkin and repeat the fit.\n") - print(signif(x$bpar, digits = digits)) - } else { - cat("t-test (unrealistically) based on the assumption of normal distribution\n") - cat("for estimators of untransformed parameters.\n") - print(signif(x$bpar[, c(1, 3, 4, 5, 6)], digits = digits)) - } - - cat("\nFOCUS Chi2 error levels in percent:\n") - x$errmin$err.min <- 100 * x$errmin$err.min - print(x$errmin, digits=digits,...) - - printSFORB <- !is.null(x$SFORB) - if(printSFORB){ - cat("\nEstimated Eigenvalues of SFORB model(s):\n") - print(x$SFORB, digits=digits,...) - } - - printff <- !is.null(x$ff) - if(printff){ - cat("\nResulting formation fractions:\n") - print(data.frame(ff = x$ff), digits=digits,...) - } - - printdistimes <- !is.null(x$distimes) - if(printdistimes){ - cat("\nEstimated disappearance times:\n") - print(x$distimes, digits=digits,...) - } - - printdata <- !is.null(x$data) - if (printdata){ - cat("\nData:\n") - print(format(x$data, digits = digits, ...), row.names = FALSE) - } - - invisible(x) -} -# vim: set ts=2 sw=2 expandtab: +if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "time", "value")) + +#' Fit a kinetic model to data with one or more state variables +#' +#' This function maximises the likelihood of the observed data using the Port +#' algorithm \code{\link{nlminb}}, and the specified initial or fixed +#' parameters and starting values. In each step of the optimsation, the +#' kinetic model is solved using the function \code{\link{mkinpredict}}. The +#' parameters of the selected error model are fitted simultaneously with the +#' degradation model parameters, as both of them are arguments of the +#' likelihood function. +#' +#' Per default, parameters in the kinetic models are internally transformed in +#' order to better satisfy the assumption of a normal distribution of their +#' estimators. +#' +#' @param mkinmod A list of class \code{\link{mkinmod}}, containing the kinetic +#' model to be fitted to the data, or one of the shorthand names ("SFO", +#' "FOMC", "DFOP", "HS", "SFORB", "IORE"). If a shorthand name is given, a +#' parent only degradation model is generated for the variable with the +#' highest value in \code{observed}. +#' @param observed A dataframe with the observed data. The first column called +#' "name" must contain the name of the observed variable for each data point. +#' The second column must contain the times of observation, named "time". +#' The third column must be named "value" and contain the observed values. +#' Zero values in the "value" column will be removed, with a warning, in +#' order to avoid problems with fitting the two-component error model. This +#' is not expected to be a problem, because in general, values of zero are +#' not observed in degradation data, because there is a lower limit of +#' detection. +#' @param parms.ini A named vector of initial values for the parameters, +#' including parameters to be optimised and potentially also fixed parameters +#' as indicated by \code{fixed_parms}. If set to "auto", initial values for +#' rate constants are set to default values. Using parameter names that are +#' not in the model gives an error. +#' +#' It is possible to only specify a subset of the parameters that the model +#' needs. You can use the parameter lists "bparms.ode" from a previously +#' fitted model, which contains the differential equation parameters from +#' this model. This works nicely if the models are nested. An example is +#' given below. +#' @param state.ini A named vector of initial values for the state variables of +#' the model. In case the observed variables are represented by more than one +#' model variable, the names will differ from the names of the observed +#' variables (see \code{map} component of \code{\link{mkinmod}}). The default +#' is to set the initial value of the first model variable to the mean of the +#' time zero values for the variable with the maximum observed value, and all +#' others to 0. If this variable has no time zero observations, its initial +#' value is set to 100. +#' @param err.ini A named vector of initial values for the error model +#' parameters to be optimised. If set to "auto", initial values are set to +#' default values. Otherwise, inital values for all error model parameters +#' must be given. +#' @param fixed_parms The names of parameters that should not be optimised but +#' rather kept at the values specified in \code{parms.ini}. +#' @param fixed_initials The names of model variables for which the initial +#' state at time 0 should be excluded from the optimisation. Defaults to all +#' state variables except for the first one. +#' @param from_max_mean If this is set to TRUE, and the model has only one +#' observed variable, then data before the time of the maximum observed value +#' (after averaging for each sampling time) are discarded, and this time is +#' subtracted from all remaining time values, so the time of the maximum +#' observed mean value is the new time zero. +#' @param solution_type If set to "eigen", the solution of the system of +#' differential equations is based on the spectral decomposition of the +#' coefficient matrix in cases that this is possible. If set to "deSolve", a +#' numerical ode solver from package \code{\link{deSolve}} is used. If set to +#' "analytical", an analytical solution of the model is used. This is only +#' implemented for simple degradation experiments with only one state +#' variable, i.e. with no metabolites. The default is "auto", which uses +#' "analytical" if possible, otherwise "deSolve" if a compiler is present, +#' and "eigen" if no compiler is present and the model can be expressed using +#' eigenvalues and eigenvectors. This argument is passed on to the helper +#' function \code{\link{mkinpredict}}. +#' @param method.ode The solution method passed via \code{\link{mkinpredict}} +#' to \code{\link{ode}} in case the solution type is "deSolve". The default +#' "lsoda" is performant, but sometimes fails to converge. +#' @param use_compiled If set to \code{FALSE}, no compiled version of the +#' \code{\link{mkinmod}} model is used in the calls to +#' \code{\link{mkinpredict}} even if a compiled version is present. +#' @param control A list of control arguments passed to \code{\link{nlminb}}. +#' @param transform_rates Boolean specifying if kinetic rate constants should +#' be transformed in the model specification used in the fitting for better +#' compliance with the assumption of normal distribution of the estimator. If +#' TRUE, also alpha and beta parameters of the FOMC model are +#' log-transformed, as well as k1 and k2 rate constants for the DFOP and HS +#' models and the break point tb of the HS model. If FALSE, zero is used as +#' a lower bound for the rates in the optimisation. +#' @param transform_fractions Boolean specifying if formation fractions +#' constants should be transformed in the model specification used in the +#' fitting for better compliance with the assumption of normal distribution +#' of the estimator. The default (TRUE) is to do transformations. If TRUE, +#' the g parameter of the DFOP and HS models are also transformed, as they +#' can also be seen as compositional data. The transformation used for these +#' transformations is the \code{\link{ilr}} transformation. +#' @param quiet Suppress printing out the current value of the negative +#' log-likelihood after each improvement? +#' @param atol Absolute error tolerance, passed to \code{\link{ode}}. Default +#' is 1e-8, lower than in \code{\link{lsoda}}. +#' @param rtol Absolute error tolerance, passed to \code{\link{ode}}. Default +#' is 1e-10, much lower than in \code{\link{lsoda}}. +#' @param n.outtimes The length of the dataseries that is produced by the model +#' prediction function \code{\link{mkinpredict}}. This impacts the accuracy +#' of the numerical solver if that is used (see \code{solution_type} +#' argument. The default value is 100. +#' @param error_model If the error model is "const", a constant standard +#' deviation is assumed. +#' +#' If the error model is "obs", each observed variable is assumed to have its +#' own variance. +#' +#' If the error model is "tc" (two-component error model), a two component +#' error model similar to the one described by Rocke and Lorenzato (1995) is +#' used for setting up the likelihood function. Note that this model +#' deviates from the model by Rocke and Lorenzato, as their model implies +#' that the errors follow a lognormal distribution for large values, not a +#' normal distribution as assumed by this method. +#' @param error_model_algorithm If "auto", the selected algorithm depends on +#' the error model. If the error model is "const", unweighted nonlinear +#' least squares fitting ("OLS") is selected. If the error model is "obs", or +#' "tc", the "d_3" algorithm is selected. +#' +#' The algorithm "d_3" will directly minimize the negative log-likelihood and +#' - independently - also use the three step algorithm described below. The +#' fit with the higher likelihood is returned. +#' +#' The algorithm "direct" will directly minimize the negative log-likelihood. +#' +#' The algorithm "twostep" will minimize the negative log-likelihood after an +#' initial unweighted least squares optimisation step. +#' +#' The algorithm "threestep" starts with unweighted least squares, then +#' optimizes only the error model using the degradation model parameters +#' found, and then minimizes the negative log-likelihood with free +#' degradation and error model parameters. +#' +#' The algorithm "fourstep" starts with unweighted least squares, then +#' optimizes only the error model using the degradation model parameters +#' found, then optimizes the degradation model again with fixed error model +#' parameters, and finally minimizes the negative log-likelihood with free +#' degradation and error model parameters. +#' +#' The algorithm "IRLS" (Iteratively Reweighted Least Squares) starts with +#' unweighted least squares, and then iterates optimization of the error +#' model parameters and subsequent optimization of the degradation model +#' using those error model parameters, until the error model parameters +#' converge. +#' @param reweight.tol Tolerance for the convergence criterion calculated from +#' the error model parameters in IRLS fits. +#' @param reweight.max.iter Maximum number of iterations in IRLS fits. +#' @param trace_parms Should a trace of the parameter values be listed? +#' @param \dots Further arguments that will be passed on to +#' \code{\link{deSolve}}. +#' @importFrom stats nlminb aggregate dist +#' @return A list with "mkinfit" in the class attribute. A summary can be +#' obtained by \code{\link{summary.mkinfit}}. +#' @note When using the "IORE" submodel for metabolites, fitting with +#' "transform_rates = TRUE" (the default) often leads to failures of the +#' numerical ODE solver. In this situation it may help to switch off the +#' internal rate transformation. +#' @author Johannes Ranke +#' @seealso Plotting methods \code{\link{plot.mkinfit}} and +#' \code{\link{mkinparplot}}. +#' +#' Comparisons of models fitted to the same data can be made using +#' \code{\link{AIC}} by virtue of the method \code{\link{logLik.mkinfit}}. +#' +#' Fitting of several models to several datasets in a single call to +#' \code{\link{mmkin}}. +#' @source Rocke, David M. und Lorenzato, Stefan (1995) A two-component model +#' for measurement error in analytical chemistry. Technometrics 37(2), 176-184. +#' @examples +#' +#' # Use shorthand notation for parent only degradation +#' fit <- mkinfit("FOMC", FOCUS_2006_C, quiet = TRUE) +#' summary(fit) +#' +#' # One parent compound, one metabolite, both single first order. +#' # Use mkinsub for convenience in model formulation. Pathway to sink included per default. +#' SFO_SFO <- mkinmod( +#' parent = mkinsub("SFO", "m1"), +#' m1 = mkinsub("SFO")) +#' # Fit the model to the FOCUS example dataset D using defaults +#' print(system.time(fit <- mkinfit(SFO_SFO, FOCUS_2006_D, +#' solution_type = "eigen", quiet = TRUE))) +#' coef(fit) +#' endpoints(fit) +#' \dontrun{ +#' # deSolve is slower when no C compiler (gcc) was available during model generation +#' print(system.time(fit.deSolve <- mkinfit(SFO_SFO, FOCUS_2006_D, +#' solution_type = "deSolve"))) +#' coef(fit.deSolve) +#' endpoints(fit.deSolve) +#' } +#' +#' # Use stepwise fitting, using optimised parameters from parent only fit, FOMC +#' \dontrun{ +#' FOMC_SFO <- mkinmod( +#' parent = mkinsub("FOMC", "m1"), +#' m1 = mkinsub("SFO")) +#' # Fit the model to the FOCUS example dataset D using defaults +#' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D, quiet = TRUE) +#' # Use starting parameters from parent only FOMC fit +#' fit.FOMC = mkinfit("FOMC", FOCUS_2006_D, quiet = TRUE) +#' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D, quiet = TRUE, +#' parms.ini = fit.FOMC$bparms.ode) +#' +#' # Use stepwise fitting, using optimised parameters from parent only fit, SFORB +#' SFORB_SFO <- mkinmod( +#' parent = list(type = "SFORB", to = "m1", sink = TRUE), +#' m1 = list(type = "SFO")) +#' # Fit the model to the FOCUS example dataset D using defaults +#' fit.SFORB_SFO <- mkinfit(SFORB_SFO, FOCUS_2006_D, quiet = TRUE) +#' fit.SFORB_SFO.deSolve <- mkinfit(SFORB_SFO, FOCUS_2006_D, solution_type = "deSolve", +#' quiet = TRUE) +#' # Use starting parameters from parent only SFORB fit (not really needed in this case) +#' fit.SFORB = mkinfit("SFORB", FOCUS_2006_D, quiet = TRUE) +#' fit.SFORB_SFO <- mkinfit(SFORB_SFO, FOCUS_2006_D, parms.ini = fit.SFORB$bparms.ode, quiet = TRUE) +#' } +#' +#' \dontrun{ +#' # Weighted fits, including IRLS +#' SFO_SFO.ff <- mkinmod(parent = mkinsub("SFO", "m1"), +#' m1 = mkinsub("SFO"), use_of_ff = "max") +#' f.noweight <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, quiet = TRUE) +#' summary(f.noweight) +#' f.obs <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, error_model = "obs", quiet = TRUE) +#' summary(f.obs) +#' f.tc <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, error_model = "tc", quiet = TRUE) +#' summary(f.tc) +#' } +#' +#' +#' @export +mkinfit <- function(mkinmod, observed, + parms.ini = "auto", + state.ini = "auto", + err.ini = "auto", + fixed_parms = NULL, + fixed_initials = names(mkinmod$diffs)[-1], + from_max_mean = FALSE, + solution_type = c("auto", "analytical", "eigen", "deSolve"), + method.ode = "lsoda", + use_compiled = "auto", + control = list(eval.max = 300, iter.max = 200), + transform_rates = TRUE, + transform_fractions = TRUE, + quiet = FALSE, + atol = 1e-8, rtol = 1e-10, n.outtimes = 100, + error_model = c("const", "obs", "tc"), + error_model_algorithm = c("auto", "d_3", "direct", "twostep", "threestep", "fourstep", "IRLS", "OLS"), + reweight.tol = 1e-8, reweight.max.iter = 10, + trace_parms = FALSE, + ...) +{ + # Check mkinmod and generate a model for the variable whith the highest value + # if a suitable string is given + parent_models_available = c("SFO", "FOMC", "DFOP", "HS", "SFORB", "IORE", "logistic") + if (class(mkinmod) != "mkinmod") { + presumed_parent_name = observed[which.max(observed$value), "name"] + if (mkinmod[[1]] %in% parent_models_available) { + speclist <- list(list(type = mkinmod, sink = TRUE)) + names(speclist) <- presumed_parent_name + mkinmod <- mkinmod(speclist = speclist) + } else { + stop("Argument mkinmod must be of class mkinmod or a string containing one of\n ", + paste(parent_models_available, collapse = ", ")) + } + } + + # Get the names of the state variables in the model + mod_vars <- names(mkinmod$diffs) + + # Get the names of observed variables + obs_vars <- names(mkinmod$spec) + + # Subset observed data with names of observed data in the model and remove NA values + observed <- subset(observed, name %in% obs_vars) + observed <- subset(observed, !is.na(value)) + + # Also remove zero values to avoid instabilities (e.g. of the 'tc' error model) + if (any(observed$value == 0)) { + warning("Observations with value of zero were removed from the data") + observed <- subset(observed, value != 0) + } + + # Obtain data for decline from maximum mean value if requested + if (from_max_mean) { + # This is only used for simple decline models + if (length(obs_vars) > 1) + stop("Decline from maximum is only implemented for models with a single observed variable") + + means <- aggregate(value ~ time, data = observed, mean, na.rm=TRUE) + t_of_max <- means[which.max(means$value), "time"] + observed <- subset(observed, time >= t_of_max) + observed$time <- observed$time - t_of_max + } + + # Number observations used for fitting + n_observed <- nrow(observed) + + # Define starting values for parameters where not specified by the user + if (parms.ini[[1]] == "auto") parms.ini = vector() + + # Warn for inital parameter specifications that are not in the model + wrongpar.names <- setdiff(names(parms.ini), mkinmod$parms) + if (length(wrongpar.names) > 0) { + warning("Initial parameter(s) ", paste(wrongpar.names, collapse = ", "), + " not used in the model") + parms.ini <- parms.ini[setdiff(names(parms.ini), wrongpar.names)] + } + + # Warn that the sum of formation fractions may exceed one if they are not + # fitted in the transformed way + if (mkinmod$use_of_ff == "max" & transform_fractions == FALSE) { + warning("The sum of formation fractions may exceed one if you do not use ", + "transform_fractions = TRUE." ) + for (box in mod_vars) { + # Stop if formation fractions are not transformed and we have no sink + if (mkinmod$spec[[box]]$sink == FALSE) { + stop("If formation fractions are not transformed during the fitting, ", + "it is not supported to turn off pathways to sink.\n ", + "Consider turning on the transformation of formation fractions or ", + "setting up a model with use_of_ff = 'min'.\n") + } + } + } + + # Do not allow fixing formation fractions if we are using the ilr transformation, + # this is not supported + if (transform_fractions == TRUE && length(fixed_parms) > 0) { + if (any(grepl("^f_", fixed_parms))) { + stop("Fixing formation fractions is not supported when using the ilr ", + "transformation.") + } + } + + # Set initial parameter values, including a small increment (salt) + # to avoid linear dependencies (singular matrix) in Eigenvalue based solutions + k_salt = 0 + defaultpar.names <- setdiff(mkinmod$parms, names(parms.ini)) + for (parmname in defaultpar.names) { + # Default values for rate constants, depending on the parameterisation + if (grepl("^k", parmname)) { + parms.ini[parmname] = 0.1 + k_salt + k_salt = k_salt + 1e-4 + } + # Default values for rate constants for reversible binding + if (grepl("free_bound$", parmname)) parms.ini[parmname] = 0.1 + if (grepl("bound_free$", parmname)) parms.ini[parmname] = 0.02 + # Default values for IORE exponents + if (grepl("^N", parmname)) parms.ini[parmname] = 1.1 + # Default values for the FOMC, DFOP and HS models + if (parmname == "alpha") parms.ini[parmname] = 1 + if (parmname == "beta") parms.ini[parmname] = 10 + if (parmname == "k1") parms.ini[parmname] = 0.1 + if (parmname == "k2") parms.ini[parmname] = 0.01 + if (parmname == "tb") parms.ini[parmname] = 5 + if (parmname == "g") parms.ini[parmname] = 0.5 + if (parmname == "kmax") parms.ini[parmname] = 0.1 + if (parmname == "k0") parms.ini[parmname] = 0.0001 + if (parmname == "r") parms.ini[parmname] = 0.2 + } + # Default values for formation fractions in case they are present + for (box in mod_vars) { + f_names <- mkinmod$parms[grep(paste0("^f_", box), mkinmod$parms)] + if (length(f_names) > 0) { + # We need to differentiate between default and specified fractions + # and set the unspecified to 1 - sum(specified)/n_unspecified + f_default_names <- intersect(f_names, defaultpar.names) + f_specified_names <- setdiff(f_names, defaultpar.names) + sum_f_specified = sum(parms.ini[f_specified_names]) + if (sum_f_specified > 1) { + stop("Starting values for the formation fractions originating from ", + box, " sum up to more than 1.") + } + if (mkinmod$spec[[box]]$sink) n_unspecified = length(f_default_names) + 1 + else { + n_unspecified = length(f_default_names) + } + parms.ini[f_default_names] <- (1 - sum_f_specified) / n_unspecified + } + } + + # Set default for state.ini if appropriate + parent_name = names(mkinmod$spec)[[1]] + if (state.ini[1] == "auto") { + parent_time_0 = subset(observed, time == 0 & name == parent_name)$value + parent_time_0_mean = mean(parent_time_0, na.rm = TRUE) + if (is.na(parent_time_0_mean)) { + state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)) + } else { + state.ini = c(parent_time_0_mean, rep(0, length(mkinmod$diffs) - 1)) + } + } + + # Name the inital state variable values if they are not named yet + if(is.null(names(state.ini))) names(state.ini) <- mod_vars + + # Transform initial parameter values for fitting + transparms.ini <- transform_odeparms(parms.ini, mkinmod, + transform_rates = transform_rates, + transform_fractions = transform_fractions) + + # Parameters to be optimised: + # Kinetic parameters in parms.ini whose names are not in fixed_parms + parms.fixed <- parms.ini[fixed_parms] + parms.optim <- parms.ini[setdiff(names(parms.ini), fixed_parms)] + + transparms.fixed <- transform_odeparms(parms.fixed, mkinmod, + transform_rates = transform_rates, + transform_fractions = transform_fractions) + transparms.optim <- transform_odeparms(parms.optim, mkinmod, + transform_rates = transform_rates, + transform_fractions = transform_fractions) + + # Inital state variables in state.ini whose names are not in fixed_initials + state.ini.fixed <- state.ini[fixed_initials] + state.ini.optim <- state.ini[setdiff(names(state.ini), fixed_initials)] + + # Preserve names of state variables before renaming initial state variable + # parameters + state.ini.optim.boxnames <- names(state.ini.optim) + state.ini.fixed.boxnames <- names(state.ini.fixed) + if(length(state.ini.optim) > 0) { + names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_") + } + if(length(state.ini.fixed) > 0) { + names(state.ini.fixed) <- paste(names(state.ini.fixed), "0", sep="_") + } + + # Decide if the solution of the model can be based on a simple analytical + # formula, the spectral decomposition of the matrix (fundamental system) + # or a numeric ode solver from the deSolve package + # Prefer deSolve over eigen if a compiled model is present and use_compiled + # is not set to FALSE + solution_type = match.arg(solution_type) + if (solution_type == "analytical" && length(mkinmod$spec) > 1) + stop("Analytical solution not implemented for models with metabolites.") + if (solution_type == "eigen" && !is.matrix(mkinmod$coefmat)) + stop("Eigenvalue based solution not possible, coefficient matrix not present.") + if (solution_type == "auto") { + if (length(mkinmod$spec) == 1) { + solution_type = "analytical" + } else { + if (!is.null(mkinmod$cf) & use_compiled[1] != FALSE) { + solution_type = "deSolve" + } else { + if (is.matrix(mkinmod$coefmat)) { + solution_type = "eigen" + if (max(observed$value, na.rm = TRUE) < 0.1) { + stop("The combination of small observed values (all < 0.1) and solution_type = eigen is error-prone") + } + } else { + solution_type = "deSolve" + } + } + } + } + + # Get the error model and the algorithm for fitting + err_mod <- match.arg(error_model) + error_model_algorithm = match.arg(error_model_algorithm) + if (error_model_algorithm == "OLS") { + if (err_mod != "const") stop("OLS is only appropriate for constant variance") + } + if (error_model_algorithm == "auto") { + error_model_algorithm = switch(err_mod, + const = "OLS", obs = "d_3", tc = "d_3") + } + errparm_names <- switch(err_mod, + "const" = "sigma", + "obs" = paste0("sigma_", obs_vars), + "tc" = c("sigma_low", "rsd_high")) + errparm_names_optim <- if (error_model_algorithm == "OLS") NULL else errparm_names + + # Define starting values for the error model + if (err.ini[1] != "auto") { + if (!identical(names(err.ini), errparm_names)) { + stop("Please supply initial values for error model components ", paste(errparm_names, collapse = ", ")) + } else { + errparms = err.ini + } + } else { + if (err_mod == "const") { + errparms = 3 + } + if (err_mod == "obs") { + errparms = rep(3, length(obs_vars)) + } + if (err_mod == "tc") { + errparms <- c(sigma_low = 0.1, rsd_high = 0.1) + } + names(errparms) <- errparm_names + } + if (error_model_algorithm == "OLS") { + errparms_optim <- NULL + } else { + errparms_optim <- errparms + } + + # Define outtimes for model solution. + # Include time points at which observed data are available + outtimes = sort(unique(c(observed$time, seq(min(observed$time), + max(observed$time), + length.out = n.outtimes)))) + + # Define the objective function for optimisation, including (back)transformations + cost_function <- function(P, trans = TRUE, OLS = FALSE, fixed_degparms = FALSE, fixed_errparms = FALSE, update_data = TRUE, ...) + { + assign("calls", calls + 1, inherits = TRUE) # Increase the model solution counter + + # Trace parameter values if requested and if we are actually optimising + if(trace_parms & update_data) cat(P, "\n") + + # Determine local parameter values for the cost estimation + if (is.numeric(fixed_degparms)) { + cost_degparms <- fixed_degparms + cost_errparms <- P + degparms_fixed = TRUE + } else { + degparms_fixed = FALSE + } + + if (is.numeric(fixed_errparms)) { + cost_degparms <- P + cost_errparms <- fixed_errparms + errparms_fixed = TRUE + } else { + errparms_fixed = FALSE + } + + if (OLS) { + cost_degparms <- P + cost_errparms <- numeric(0) + } + + if (!OLS & !degparms_fixed & !errparms_fixed) { + cost_degparms <- P[1:(length(P) - length(errparms))] + cost_errparms <- P[(length(cost_degparms) + 1):length(P)] + } + + # Initial states for t0 + if(length(state.ini.optim) > 0) { + odeini <- c(cost_degparms[1:length(state.ini.optim)], state.ini.fixed) + names(odeini) <- c(state.ini.optim.boxnames, state.ini.fixed.boxnames) + } else { + odeini <- state.ini.fixed + names(odeini) <- state.ini.fixed.boxnames + } + + odeparms.optim <- cost_degparms[(length(state.ini.optim) + 1):length(cost_degparms)] + + if (trans == TRUE) { + odeparms <- c(odeparms.optim, transparms.fixed) + parms <- backtransform_odeparms(odeparms, mkinmod, + transform_rates = transform_rates, + transform_fractions = transform_fractions) + } else { + parms <- c(odeparms.optim, parms.fixed) + } + + # Solve the system with current parameter values + out <- mkinpredict(mkinmod, parms, + odeini, outtimes, + solution_type = solution_type, + use_compiled = use_compiled, + method.ode = method.ode, + atol = atol, rtol = rtol, ...) + + out_long <- mkin_wide_to_long(out, time = "time") + + if (err_mod == "const") { + observed$std <- if (OLS) NA else cost_errparms["sigma"] + } + if (err_mod == "obs") { + std_names <- paste0("sigma_", observed$name) + observed$std <- cost_errparms[std_names] + } + if (err_mod == "tc") { + tmp <- merge(observed, out_long, by = c("time", "name")) + tmp$name <- ordered(tmp$name, levels = obs_vars) + tmp <- tmp[order(tmp$name, tmp$time), ] + observed$std <- sqrt(cost_errparms["sigma_low"]^2 + tmp$value.y^2 * cost_errparms["rsd_high"]^2) + } + + cost_data <- merge(observed[c("name", "time", "value", "std")], out_long, + by = c("name", "time"), suffixes = c(".observed", ".predicted")) + + if (OLS) { + # Cost is the sum of squared residuals + cost <- with(cost_data, sum((value.observed - value.predicted)^2)) + } else { + # Cost is the negative log-likelihood + cost <- - with(cost_data, + sum(dnorm(x = value.observed, mean = value.predicted, sd = std, log = TRUE))) + } + + # We update the current cost and data during the optimisation, not + # during hessian calculations + if (update_data) { + + assign("out_predicted", out_long, inherits = TRUE) + assign("current_data", cost_data, inherits = TRUE) + + if (cost < cost.current) { + assign("cost.current", cost, inherits = TRUE) + if (!quiet) cat(ifelse(OLS, "Sum of squared residuals", "Negative log-likelihood"), + " at call ", calls, ": ", cost.current, "\n", sep = "") + } + } + return(cost) + } + + names_optim <- c(names(state.ini.optim), + names(transparms.optim), + errparm_names_optim) + n_optim <- length(names_optim) + + # Define lower and upper bounds other than -Inf and Inf for parameters + # for which no internal transformation is requested in the call to mkinfit + # and for optimised error model parameters + lower <- rep(-Inf, n_optim) + upper <- rep(Inf, n_optim) + names(lower) <- names(upper) <- names_optim + + # IORE exponents are not transformed, but need a lower bound + index_N <- grep("^N", names(lower)) + lower[index_N] <- 0 + + if (!transform_rates) { + index_k <- grep("^k_", names(lower)) + lower[index_k] <- 0 + index_k__iore <- grep("^k__iore_", names(lower)) + lower[index_k__iore] <- 0 + other_rate_parms <- intersect(c("alpha", "beta", "k1", "k2", "tb", "r"), names(lower)) + lower[other_rate_parms] <- 0 + } + + if (!transform_fractions) { + index_f <- grep("^f_", names(upper)) + lower[index_f] <- 0 + upper[index_f] <- 1 + other_fraction_parms <- intersect(c("g"), names(upper)) + lower[other_fraction_parms] <- 0 + upper[other_fraction_parms] <- 1 + } + + if (err_mod == "const") { + if (error_model_algorithm != "OLS") { + lower["sigma"] <- 0 + } + } + if (err_mod == "obs") { + index_sigma <- grep("^sigma_", names(lower)) + lower[index_sigma] <- 0 + } + if (err_mod == "tc") { + lower["sigma_low"] <- 0 + lower["rsd_high"] <- 0 + } + + # Counter for cost function evaluations + calls = 0 + cost.current <- Inf + out_predicted <- NA + current_data <- NA + + # Show parameter names if tracing is requested + if(trace_parms) cat(names_optim, "\n") + + # browser() + + # Do the fit and take the time until the hessians are calculated + fit_time <- system.time({ + degparms <- c(state.ini.optim, transparms.optim) + n_degparms <- length(degparms) + degparms_index <- seq(1, n_degparms) + errparms_index <- seq(n_degparms + 1, length.out = length(errparms)) + + if (error_model_algorithm == "d_3") { + if (!quiet) message("Directly optimising the complete model") + parms.start <- c(degparms, errparms) + fit_direct <- nlminb(parms.start, cost_function, + lower = lower[names(parms.start)], + upper = upper[names(parms.start)], + control = control, ...) + fit_direct$logLik <- - cost.current + if (error_model_algorithm == "direct") { + degparms <- fit_direct$par[degparms_index] + errparms <- fit_direct$par[errparms_index] + } else { + cost.current <- Inf # reset to avoid conflict with the OLS step + } + } + if (error_model_algorithm != "direct") { + if (!quiet) message("Ordinary least squares optimisation") + fit <- nlminb(degparms, cost_function, control = control, + lower = lower[names(degparms)], + upper = upper[names(degparms)], OLS = TRUE, ...) + degparms <- fit$par + + # Get the maximum likelihood estimate for sigma at the optimum parameter values + current_data$residual <- current_data$value.observed - current_data$value.predicted + sigma_mle <- sqrt(sum(current_data$residual^2)/nrow(current_data)) + + # Use that estimate for the constant variance, or as first guess if err_mod = "obs" + if (err_mod != "tc") { + errparms[names(errparms)] <- sigma_mle + } + fit$par <- c(fit$par, errparms) + + cost.current <- cost_function(c(degparms, errparms), OLS = FALSE) + fit$logLik <- - cost.current + } + if (error_model_algorithm %in% c("threestep", "fourstep", "d_3")) { + if (!quiet) message("Optimising the error model") + fit <- nlminb(errparms, cost_function, control = control, + lower = lower[names(errparms)], + upper = upper[names(errparms)], + fixed_degparms = degparms, ...) + errparms <- fit$par + } + if (error_model_algorithm == "fourstep") { + if (!quiet) message("Optimising the degradation model") + fit <- nlminb(degparms, cost_function, control = control, + lower = lower[names(degparms)], + upper = upper[names(degparms)], + fixed_errparms = errparms, ...) + degparms <- fit$par + } + if (error_model_algorithm %in% + c("direct", "twostep", "threestep", "fourstep", "d_3")) { + if (!quiet) message("Optimising the complete model") + parms.start <- c(degparms, errparms) + fit <- nlminb(parms.start, cost_function, + lower = lower[names(parms.start)], + upper = upper[names(parms.start)], + control = control, ...) + degparms <- fit$par[degparms_index] + errparms <- fit$par[errparms_index] + fit$logLik <- - cost.current + + if (error_model_algorithm == "d_3") { + d_3_messages = c( + same = "Direct fitting and three-step fitting yield approximately the same likelihood", + threestep = "Three-step fitting yielded a higher likelihood than direct fitting", + direct = "Direct fitting yielded a higher likelihood than three-step fitting") + rel_diff <- abs((fit_direct$logLik - fit$logLik))/-mean(c(fit_direct$logLik, fit$logLik)) + if (rel_diff < 0.0001) { + if (!quiet) message(d_3_messages["same"]) + fit$d_3_message <- d_3_messages["same"] + } else { + if (fit$logLik > fit_direct$logLik) { + if (!quiet) message(d_3_messages["threestep"]) + fit$d_3_message <- d_3_messages["threestep"] + } else { + if (!quiet) message(d_3_messages["direct"]) + fit <- fit_direct + fit$d_3_message <- d_3_messages["direct"] + } + } + } + } + if (err_mod != "const" & error_model_algorithm == "IRLS") { + reweight.diff <- 1 + n.iter <- 0 + errparms_last <- errparms + + while (reweight.diff > reweight.tol & + n.iter < reweight.max.iter) { + + if (!quiet) message("Optimising the error model") + fit <- nlminb(errparms, cost_function, control = control, + lower = lower[names(errparms)], + upper = upper[names(errparms)], + fixed_degparms = degparms, ...) + errparms <- fit$par + + if (!quiet) message("Optimising the degradation model") + fit <- nlminb(degparms, cost_function, control = control, + lower = lower[names(degparms)], + upper = upper[names(degparms)], + fixed_errparms = errparms, ...) + degparms <- fit$par + + reweight.diff <- dist(rbind(errparms, errparms_last)) + errparms_last <- errparms + + fit$par <- c(fit$par, errparms) + cost.current <- cost_function(c(degparms, errparms), OLS = FALSE) + fit$logLik <- - cost.current + } + } + + fit$hessian <- try(numDeriv::hessian(cost_function, c(degparms, errparms), OLS = FALSE, + update_data = FALSE), silent = TRUE) + + # Backtransform parameters + bparms.optim = backtransform_odeparms(fit$par, mkinmod, + transform_rates = transform_rates, + transform_fractions = transform_fractions) + bparms.fixed = c(state.ini.fixed, parms.fixed) + bparms.all = c(bparms.optim, parms.fixed) + + fit$hessian_notrans <- try(numDeriv::hessian(cost_function, c(bparms.all, errparms), + OLS = FALSE, trans = FALSE, update_data = FALSE), silent = TRUE) + }) + + fit$error_model_algorithm <- error_model_algorithm + + if (fit$convergence != 0) { + fit$warning = paste0("Optimisation did not converge:\n", fit$message) + warning(fit$warning) + } else { + if(!quiet) message("Optimisation successfully terminated.\n") + } + + # We need to return some more data for summary and plotting + fit$solution_type <- solution_type + fit$transform_rates <- transform_rates + fit$transform_fractions <- transform_fractions + fit$reweight.tol <- reweight.tol + fit$reweight.max.iter <- reweight.max.iter + fit$control <- control + fit$calls <- calls + fit$time <- fit_time + + # We also need the model for summary and plotting + fit$mkinmod <- mkinmod + + # We need data and predictions for summary and plotting + fit$observed <- observed + fit$obs_vars <- obs_vars + fit$predicted <- out_predicted + + # Residual sum of squares as a function of the fitted parameters + fit$rss <- function(P) cost_function(P, OLS = TRUE, update_data = FALSE) + + # Log-likelihood with possibility to fix degparms or errparms + fit$ll <- function(P, fixed_degparms = FALSE, fixed_errparms = FALSE) { + - cost_function(P, fixed_degparms = fixed_degparms, + fixed_errparms = fixed_errparms, OLS = FALSE, update_data = FALSE) + } + + # Collect initial parameter values in three dataframes + fit$start <- data.frame(value = c(state.ini.optim, + parms.optim, errparms_optim)) + fit$start$type = c(rep("state", length(state.ini.optim)), + rep("deparm", length(parms.optim)), + rep("error", length(errparms_optim))) + + fit$start_transformed = data.frame( + value = c(state.ini.optim, transparms.optim, errparms_optim), + lower = lower, + upper = upper) + + fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) + fit$fixed$type = c(rep("state", length(state.ini.fixed)), + rep("deparm", length(parms.fixed))) + + # Sort observed, predicted and residuals + current_data$name <- ordered(current_data$name, levels = obs_vars) + + ordered_data <- current_data[order(current_data$name, current_data$time), ] + + fit$data <- data.frame(time = ordered_data$time, + variable = ordered_data$name, + observed = ordered_data$value.observed, + predicted = ordered_data$value.predicted) + + fit$data$residual <- fit$data$observed - fit$data$predicted + + fit$atol <- atol + fit$rtol <- rtol + fit$err_mod <- err_mod + + # Return different sets of backtransformed parameters for summary and plotting + fit$bparms.optim <- bparms.optim + fit$bparms.fixed <- bparms.fixed + + # Return ode and state parameters for further fitting + fit$bparms.ode <- bparms.all[mkinmod$parms] + fit$bparms.state <- c(bparms.all[setdiff(names(bparms.all), names(fit$bparms.ode))], + state.ini.fixed) + names(fit$bparms.state) <- gsub("_0$", "", names(fit$bparms.state)) + + fit$errparms <- errparms + fit$df.residual <- n_observed - length(c(degparms, errparms)) + + fit$date <- date() + fit$version <- as.character(utils::packageVersion("mkin")) + fit$Rversion <- paste(R.version$major, R.version$minor, sep=".") + + class(fit) <- c("mkinfit", "modFit") + return(fit) +} diff --git a/R/mkinmod.R b/R/mkinmod.R index 26148f18..a1ae0021 100644 --- a/R/mkinmod.R +++ b/R/mkinmod.R @@ -1,383 +1,483 @@ -# Copyright (C) 2010-2015,2019 Johannes Ranke {{{ -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see }}} - -mkinmod <- function(..., use_of_ff = "min", speclist = NULL, quiet = FALSE, verbose = FALSE) -{ - if (is.null(speclist)) spec <- list(...) - else spec <- speclist - obs_vars <- names(spec) - - # Check if any of the names of the observed variables contains any other - for (obs_var in obs_vars) { - if (length(grep(obs_var, obs_vars)) > 1) stop("Sorry, variable names can not contain each other") - if (grepl("_to_", obs_var)) stop("Sorry, names of observed variables can not contain _to_") - if (obs_var == "sink") stop("Naming a compound 'sink' is not supported") - } - - if (!use_of_ff %in% c("min", "max")) - stop("The use of formation fractions 'use_of_ff' can only be 'min' or 'max'") - - # The returned model will be a list of character vectors, containing {{{ - # differential equations (if supported), parameter names and a mapping from - # model variables to observed variables. If possible, a matrix representation - # of the differential equations is included - # Compiling the functions from the C code generated below only works if the - # implicit assumption about differential equations specified below - # is satisfied - parms <- vector() - # }}} - - # Do not return a coefficient matrix mat when FOMC, IORE, DFOP, HS or logistic is used for the parent {{{ - if(spec[[1]]$type %in% c("FOMC", "IORE", "DFOP", "HS", "logistic")) { - mat = FALSE - } else mat = TRUE - #}}} - - # Establish a list of differential equations as well as a map from observed {{{ - # compartments to differential equations - diffs <- vector() - map <- list() - for (varname in obs_vars) - { - # Check the type component of the compartment specification {{{ - if(is.null(spec[[varname]]$type)) stop( - "Every part of the model specification must be a list containing a type component") - if(!spec[[varname]]$type %in% c("SFO", "FOMC", "IORE", "DFOP", "HS", "SFORB", "logistic")) stop( - "Available types are SFO, FOMC, IORE, DFOP, HS, SFORB and logistic only") - if(spec[[varname]]$type %in% c("FOMC", "DFOP", "HS", "logistic") & match(varname, obs_vars) != 1) { - stop(paste("Types FOMC, DFOP, HS and logistic are only implemented for the first compartment,", - "which is assumed to be the source compartment")) - } - #}}} - # New (sub)compartments (boxes) needed for the model type {{{ - new_boxes <- switch(spec[[varname]]$type, - SFO = varname, - FOMC = varname, - IORE = varname, - DFOP = varname, - HS = varname, - logistic = varname, - SFORB = paste(varname, c("free", "bound"), sep = "_") - ) - map[[varname]] <- new_boxes - names(map[[varname]]) <- rep(spec[[varname]]$type, length(new_boxes)) #}}} - # Start a new differential equation for each new box {{{ - new_diffs <- paste("d_", new_boxes, " =", sep = "") - names(new_diffs) <- new_boxes - diffs <- c(diffs, new_diffs) #}}} - } #}}} - - # Create content of differential equations and build parameter list {{{ - for (varname in obs_vars) - { - # Get the name of the box(es) we are working on for the decline term(s) - box_1 = map[[varname]][[1]] # This is the only box unless type is SFORB - # Turn on sink if this is not explicitly excluded by the user by - # specifying sink=FALSE - if(is.null(spec[[varname]]$sink)) spec[[varname]]$sink <- TRUE - if(spec[[varname]]$type %in% c("SFO", "IORE", "SFORB")) { # {{{ Add decline term - if (use_of_ff == "min") { # Minimum use of formation fractions - if(spec[[varname]]$type == "IORE" && length(spec[[varname]]$to) > 0) { - stop("Transformation reactions from compounds modelled with IORE\n", - "are only supported with formation fractions (use_of_ff = 'max')") - } - if(spec[[varname]]$sink) { - # If sink is required, add first-order/IORE sink term - k_compound_sink <- paste("k", box_1, "sink", sep = "_") - if(spec[[varname]]$type == "IORE") { - k_compound_sink <- paste("k__iore", box_1, "sink", sep = "_") - } - parms <- c(parms, k_compound_sink) - decline_term <- paste(k_compound_sink, "*", box_1) - if(spec[[varname]]$type == "IORE") { - N <- paste("N", box_1, sep = "_") - parms <- c(parms, N) - decline_term <- paste0(decline_term, "^", N) - } - } else { # otherwise no decline term needed here - decline_term = "0" - } - } else { - k_compound <- paste("k", box_1, sep = "_") - if(spec[[varname]]$type == "IORE") { - k_compound <- paste("k__iore", box_1, sep = "_") - } - parms <- c(parms, k_compound) - decline_term <- paste(k_compound, "*", box_1) - if(spec[[varname]]$type == "IORE") { - N <- paste("N", box_1, sep = "_") - parms <- c(parms, N) - decline_term <- paste0(decline_term, "^", N) - } - } - } #}}} - if(spec[[varname]]$type == "FOMC") { # {{{ Add FOMC decline term - # From p. 53 of the FOCUS kinetics report, without the power function so it works in C - decline_term <- paste("(alpha/beta) * 1/((time/beta) + 1) *", box_1) - parms <- c(parms, "alpha", "beta") - } #}}} - if(spec[[varname]]$type == "DFOP") { # {{{ Add DFOP decline term - # From p. 57 of the FOCUS kinetics report - decline_term <- paste("((k1 * g * exp(-k1 * time) + k2 * (1 - g) * exp(-k2 * time)) / (g * exp(-k1 * time) + (1 - g) * exp(-k2 * time))) *", box_1) - parms <- c(parms, "k1", "k2", "g") - } #}}} - HS_decline <- "ifelse(time <= tb, k1, k2)" # Used below for automatic translation to C - if(spec[[varname]]$type == "HS") { # {{{ Add HS decline term - # From p. 55 of the FOCUS kinetics report - decline_term <- paste(HS_decline, "*", box_1) - parms <- c(parms, "k1", "k2", "tb") - } #}}} - if(spec[[varname]]$type == "logistic") { # {{{ Add logistic decline term - # From p. 67 of the FOCUS kinetics report (2014) - decline_term <- paste("(k0 * kmax)/(k0 + (kmax - k0) * exp(-r * time)) *", box_1) - parms <- c(parms, "kmax", "k0", "r") - } #}}} - # Add origin decline term to box 1 (usually the only box, unless type is SFORB)#{{{ - diffs[[box_1]] <- paste(diffs[[box_1]], "-", decline_term)#}}} - if(spec[[varname]]$type == "SFORB") { # {{{ Add SFORB reversible binding terms - box_2 = map[[varname]][[2]] - if (use_of_ff == "min") { # Minimum use of formation fractions - k_free_bound <- paste("k", varname, "free", "bound", sep = "_") - k_bound_free <- paste("k", varname, "bound", "free", sep = "_") - parms <- c(parms, k_free_bound, k_bound_free) - reversible_binding_term_1 <- paste("-", k_free_bound, "*", box_1, "+", - k_bound_free, "*", box_2) - reversible_binding_term_2 <- paste("+", k_free_bound, "*", box_1, "-", - k_bound_free, "*", box_2) - } else { # Use formation fractions also for the free compartment - stop("The maximum use of formation fractions is not supported for SFORB models") - # The problems were: Calculation of dissipation times did not work in this case - # and the coefficient matrix is not generated correctly by the code present - # in this file in this case - #f_free_bound <- paste("f", varname, "free", "bound", sep = "_") - #k_bound_free <- paste("k", varname, "bound", "free", sep = "_") - #parms <- c(parms, f_free_bound, k_bound_free) - #reversible_binding_term_1 <- paste("+", k_bound_free, "*", box_2) - #reversible_binding_term_2 <- paste("+", f_free_bound, "*", k_compound, "*", box_1, "-", - # k_bound_free, "*", box_2) - } - diffs[[box_1]] <- paste(diffs[[box_1]], reversible_binding_term_1) - diffs[[box_2]] <- paste(diffs[[box_2]], reversible_binding_term_2) - } #}}} - - # Transfer between compartments#{{{ - to <- spec[[varname]]$to - if(!is.null(to)) { - # Name of box from which transfer takes place - origin_box <- box_1 - - # Number of targets - n_targets = length(to) - - # Add transfer terms to listed compartments - for (target in to) { - if (!target %in% obs_vars) stop("You did not specify a submodel for target variable ", target) - target_box <- switch(spec[[target]]$type, - SFO = target, - IORE = target, - SFORB = paste(target, "free", sep = "_")) - if (use_of_ff == "min" && spec[[varname]]$type %in% c("SFO", "SFORB")) - { - k_from_to <- paste("k", origin_box, target_box, sep = "_") - parms <- c(parms, k_from_to) - diffs[[origin_box]] <- paste(diffs[[origin_box]], "-", - k_from_to, "*", origin_box) - diffs[[target_box]] <- paste(diffs[[target_box]], "+", - k_from_to, "*", origin_box) - } else { - # Do not introduce a formation fraction if this is the only target - if (spec[[origin_box]]$sink == FALSE && n_targets == 1) { - diffs[[target_box]] <- paste(diffs[[target_box]], "+", - decline_term) - } else { - fraction_to_target = paste("f", origin_box, "to", target, sep = "_") - parms <- c(parms, fraction_to_target) - diffs[[target_box]] <- paste(diffs[[target_box]], "+", - fraction_to_target, "*", decline_term) - } - } - } - } #}}} - } #}}} - - model <- list(diffs = diffs, parms = parms, map = map, spec = spec, use_of_ff = use_of_ff) - - # Create coefficient matrix if appropriate#{{{ - if (mat) { - boxes <- names(diffs) - n <- length(boxes) - m <- matrix(nrow=n, ncol=n, dimnames=list(boxes, boxes)) - - if (use_of_ff == "min") { # {{{ Minimum use of formation fractions - for (from in boxes) { - for (to in boxes) { - if (from == to) { # diagonal elements - k.candidate = paste("k", from, c(boxes, "sink"), sep = "_") - k.candidate = sub("free.*bound", "free_bound", k.candidate) - k.candidate = sub("bound.*free", "bound_free", k.candidate) - k.effective = intersect(model$parms, k.candidate) - m[from,to] = ifelse(length(k.effective) > 0, - paste("-", k.effective, collapse = " "), "0") - - } else { # off-diagonal elements - k.candidate = paste("k", from, to, sep = "_") - if (sub("_free$", "", from) == sub("_bound$", "", to)) { - k.candidate = paste("k", sub("_free$", "_free_bound", from), sep = "_") - } - if (sub("_bound$", "", from) == sub("_free$", "", to)) { - k.candidate = paste("k", sub("_bound$", "_bound_free", from), sep = "_") - } - k.effective = intersect(model$parms, k.candidate) - m[to, from] = ifelse(length(k.effective) > 0, - k.effective, "0") - } - } - } # }}} - } else { # {{{ Use formation fractions where possible - for (from in boxes) { - for (to in boxes) { - if (from == to) { # diagonal elements - k.candidate = paste("k", from, sep = "_") - m[from,to] = ifelse(k.candidate %in% model$parms, - paste("-", k.candidate), "0") - if(grepl("_free", from)) { # add transfer to bound compartment for SFORB - m[from,to] = paste(m[from,to], "-", paste("k", from, "bound", sep = "_")) - } - if(grepl("_bound", from)) { # add backtransfer to free compartment for SFORB - m[from,to] = paste("- k", from, "free", sep = "_") - } - m[from,to] = m[from,to] - } else { # off-diagonal elements - f.candidate = paste("f", from, "to", to, sep = "_") - k.candidate = paste("k", from, to, sep = "_") - # SFORB with maximum use of formation fractions not implemented, see above - m[to, from] = ifelse(f.candidate %in% model$parms, - paste(f.candidate, " * k_", from, sep = ""), - ifelse(k.candidate %in% model$parms, k.candidate, "0")) - # Special case: singular pathway and no sink - if (spec[[from]]$sink == FALSE && length(spec[[from]]$to) == 1 && to %in% spec[[from]]$to) { - m[to, from] = paste("k", from, sep = "_") - } - } - } - } - } # }}} - model$coefmat <- m - }#}}} - - # Try to create a function compiled from C code if more than one observed {{{ - # variable and gcc is available - if (length(obs_vars) > 1) { - if (Sys.which("gcc") != "") { - - # Translate the R code for the derivatives to C code - diffs.C <- paste(diffs, collapse = ";\n") - diffs.C <- paste0(diffs.C, ";") - - # HS - diffs.C <- gsub(HS_decline, "(time <= tb ? k1 : k2)", diffs.C, fixed = TRUE) - - for (i in seq_along(diffs)) { - state_var <- names(diffs)[i] - - # IORE - if (state_var %in% obs_vars) { - if (spec[[state_var]]$type == "IORE") { - diffs.C <- gsub(paste0(state_var, "^N_", state_var), - paste0("pow(y[", i - 1, "], N_", state_var, ")"), - diffs.C, fixed = TRUE) - } - } - - # Replace d_... terms by f[i-1] - # First line - pattern <- paste0("^d_", state_var) - replacement <- paste0("\nf[", i - 1, "]") - diffs.C <- gsub(pattern, replacement, diffs.C) - # Other lines - pattern <- paste0("\\nd_", state_var) - replacement <- paste0("\nf[", i - 1, "]") - diffs.C <- gsub(pattern, replacement, diffs.C) - - # Replace names of observed variables by y[i], - # making the implicit assumption that the observed variables only occur after "* " - pattern <- paste0("\\* ", state_var) - replacement <- paste0("* y[", i - 1, "]") - diffs.C <- gsub(pattern, replacement, diffs.C) - } - - derivs_sig <- signature(n = "integer", t = "numeric", y = "numeric", - f = "numeric", rpar = "numeric", ipar = "integer") - - # Declare the time variable in the body of the function if it is used - derivs_code <- if (spec[[1]]$type %in% c("FOMC", "DFOP", "HS")) { - paste0("double time = *t;\n", diffs.C) - } else { - diffs.C - } - - # Define the function initializing the parameters - npar <- length(parms) - initpar_code <- paste0( - "static double parms [", npar, "];\n", - paste0("#define ", parms, " parms[", 0:(npar - 1), "]\n", collapse = ""), - "\n", - "void initpar(void (* odeparms)(int *, double *)) {\n", - " int N = ", npar, ";\n", - " odeparms(&N, parms);\n", - "}\n\n") - - # Try to build a shared library - cf <- try(cfunction(list(func = derivs_sig), derivs_code, - otherdefs = initpar_code, - verbose = verbose, - convention = ".C", language = "C"), - silent = TRUE) - - if (!inherits(cf, "try-error")) { - if (!quiet) message("Successfully compiled differential equation model from auto-generated C code.") - model$cf <- cf - } - } - } - # }}} - - class(model) <- "mkinmod" - return(model) -} - -print.mkinmod <- function(x, ...) { - cat(" model generated with\n") - cat("Use of formation fractions $use_of_ff:", x$use_of_ff, "\n") - cat("Specification $spec:\n") - for (obs in names(x$spec)) { - cat("$", obs, "\n", sep = "") - spl <- x$spec[[obs]] - cat("$type:", spl$type) - if (!is.null(spl$to) && length(spl$to)) cat("; $to: ", paste(spl$to, collapse = ", "), sep = "") - cat("; $sink: ", spl$sink, sep = "") - if (!is.null(spl$full_name)) if (!is.na(spl$full_name)) cat("; $full_name:", spl$full_name) - cat("\n") - } - if (is.matrix(x$coefmat)) cat("Coefficient matrix $coefmat available\n") - if (!is.null(x$cf)) cat("Compiled model $cf available\n") - cat("Differential equations:\n") - nice_diffs <- gsub("^(d.*) =", "\\1/dt =", x[["diffs"]]) - writeLines(strwrap(nice_diffs, exdent = 11)) -} -# vim: set foldmethod=marker ts=2 sw=2 expandtab: +#' Function to set up a kinetic model with one or more state variables +#' +#' The function usually takes several expressions, each assigning a compound +#' name to a list, specifying the kinetic model type and reaction or transfer +#' to other observed compartments. Instead of specifying several expressions, a +#' list of lists can be given in the speclist argument. +#' +#' For the definition of model types and their parameters, the equations given +#' in the FOCUS and NAFTA guidance documents are used. +#' +#' @param ... For each observed variable, a list has to be specified as an +#' argument, containing at least a component \code{type}, specifying the type +#' of kinetics to use for the variable. Currently, single first order +#' kinetics "SFO", indeterminate order rate equation kinetics "IORE", or +#' single first order with reversible binding "SFORB" are implemented for all +#' variables, while "FOMC", "DFOP" and "HS" can additionally be chosen for +#' the first variable which is assumed to be the source compartment. +#' Additionally, each component of the list can include a character vector +#' \code{to}, specifying names of variables to which a transfer is to be +#' assumed in the model. If the argument \code{use_of_ff} is set to "min" +#' (default) and the model for the compartment is "SFO" or "SFORB", an +#' additional component of the list can be "sink=FALSE" effectively fixing +#' the flux to sink to zero. +#' @param use_of_ff Specification of the use of formation fractions in the +#' model equations and, if applicable, the coefficient matrix. If "min", a +#' minimum use of formation fractions is made in order to avoid fitting the +#' product of formation fractions and rate constants. If "max", formation +#' fractions are always used. +#' @param speclist The specification of the observed variables and their +#' submodel types and pathways can be given as a single list using this +#' argument. Default is NULL. +#' @param quiet Should messages be suppressed? +#' @param verbose If \code{TRUE}, passed to \code{\link{cfunction}} if +#' applicable to give detailed information about the C function being built. +#' @importFrom methods signature +#' @importFrom inline cfunction +#' @return A list of class \code{mkinmod} for use with \code{\link{mkinfit}}, +#' containing, among others, +#' \item{diffs}{ +#' A vector of string representations of differential equations, one for +#' each modelling variable. +#' } +#' \item{map}{ +#' A list containing named character vectors for each observed variable, +#' specifying the modelling variables by which it is represented. +#' } +#' \item{use_of_ff}{ +#' The content of \code{use_of_ff} is passed on in this list component. +#' } +#' \item{coefmat}{ +#' The coefficient matrix, if the system of differential equations can be +#' represented by one. +#' } +#' \item{ll}{ +#' The likelihood function, taking the parameter vector as the first argument. +#' } +#' @note The IORE submodel is not well tested for metabolites. When using this +#' model for metabolites, you may want to read the second note in the help +#' page to \code{\link{mkinfit}}. +#' @author Johannes Ranke +#' @references FOCUS (2006) \dQuote{Guidance Document on Estimating Persistence +#' and Degradation Kinetics from Environmental Fate Studies on Pesticides in +#' EU Registration} Report of the FOCUS Work Group on Degradation Kinetics, +#' EC Document Reference Sanco/10058/2005 version 2.0, 434 pp, +#' \url{http://esdac.jrc.ec.europa.eu/projects/degradation-kinetics} +#' +#' NAFTA Technical Working Group on Pesticides (not dated) Guidance for +#' Evaluating and Calculating Degradation Kinetics in Environmental Media +#' @examples +#' +#' # Specify the SFO model (this is not needed any more, as we can now mkinfit("SFO", ...) +#' SFO <- mkinmod(parent = list(type = "SFO")) +#' +#' # One parent compound, one metabolite, both single first order +#' SFO_SFO <- mkinmod( +#' parent = mkinsub("SFO", "m1"), +#' m1 = mkinsub("SFO")) +#' +#' \dontrun{ +#' # The above model used to be specified like this, before the advent of mkinsub() +#' SFO_SFO <- mkinmod( +#' parent = list(type = "SFO", to = "m1"), +#' m1 = list(type = "SFO")) +#' +#' # Show details of creating the C function +#' SFO_SFO <- mkinmod( +#' parent = mkinsub("SFO", "m1"), +#' m1 = mkinsub("SFO"), verbose = TRUE) +#' +#' # If we have several parallel metabolites +#' # (compare tests/testthat/test_synthetic_data_for_UBA_2014.R) +#' m_synth_DFOP_par <- mkinmod(parent = mkinsub("DFOP", c("M1", "M2")), +#' M1 = mkinsub("SFO"), +#' M2 = mkinsub("SFO"), +#' use_of_ff = "max", quiet = TRUE) +#' +#' fit_DFOP_par_c <- mkinfit(m_synth_DFOP_par, +#' synthetic_data_for_UBA_2014[[12]]$data, +#' quiet = TRUE) +#' } +#' +#' @export mkinmod +mkinmod <- function(..., use_of_ff = "min", speclist = NULL, quiet = FALSE, verbose = FALSE) +{ + if (is.null(speclist)) spec <- list(...) + else spec <- speclist + obs_vars <- names(spec) + + # Check if any of the names of the observed variables contains any other + for (obs_var in obs_vars) { + if (length(grep(obs_var, obs_vars)) > 1) stop("Sorry, variable names can not contain each other") + if (grepl("_to_", obs_var)) stop("Sorry, names of observed variables can not contain _to_") + if (obs_var == "sink") stop("Naming a compound 'sink' is not supported") + } + + if (!use_of_ff %in% c("min", "max")) + stop("The use of formation fractions 'use_of_ff' can only be 'min' or 'max'") + + # The returned model will be a list of character vectors, containing {{{ + # differential equations (if supported), parameter names and a mapping from + # model variables to observed variables. If possible, a matrix representation + # of the differential equations is included + # Compiling the functions from the C code generated below only works if the + # implicit assumption about differential equations specified below + # is satisfied + parms <- vector() + # }}} + + # Do not return a coefficient matrix mat when FOMC, IORE, DFOP, HS or logistic is used for the parent {{{ + if(spec[[1]]$type %in% c("FOMC", "IORE", "DFOP", "HS", "logistic")) { + mat = FALSE + } else mat = TRUE + #}}} + + # Establish a list of differential equations as well as a map from observed {{{ + # compartments to differential equations + diffs <- vector() + map <- list() + for (varname in obs_vars) + { + # Check the type component of the compartment specification {{{ + if(is.null(spec[[varname]]$type)) stop( + "Every part of the model specification must be a list containing a type component") + if(!spec[[varname]]$type %in% c("SFO", "FOMC", "IORE", "DFOP", "HS", "SFORB", "logistic")) stop( + "Available types are SFO, FOMC, IORE, DFOP, HS, SFORB and logistic only") + if(spec[[varname]]$type %in% c("FOMC", "DFOP", "HS", "logistic") & match(varname, obs_vars) != 1) { + stop(paste("Types FOMC, DFOP, HS and logistic are only implemented for the first compartment,", + "which is assumed to be the source compartment")) + } + #}}} + # New (sub)compartments (boxes) needed for the model type {{{ + new_boxes <- switch(spec[[varname]]$type, + SFO = varname, + FOMC = varname, + IORE = varname, + DFOP = varname, + HS = varname, + logistic = varname, + SFORB = paste(varname, c("free", "bound"), sep = "_") + ) + map[[varname]] <- new_boxes + names(map[[varname]]) <- rep(spec[[varname]]$type, length(new_boxes)) #}}} + # Start a new differential equation for each new box {{{ + new_diffs <- paste("d_", new_boxes, " =", sep = "") + names(new_diffs) <- new_boxes + diffs <- c(diffs, new_diffs) #}}} + } #}}} + + # Create content of differential equations and build parameter list {{{ + for (varname in obs_vars) + { + # Get the name of the box(es) we are working on for the decline term(s) + box_1 = map[[varname]][[1]] # This is the only box unless type is SFORB + # Turn on sink if this is not explicitly excluded by the user by + # specifying sink=FALSE + if(is.null(spec[[varname]]$sink)) spec[[varname]]$sink <- TRUE + if(spec[[varname]]$type %in% c("SFO", "IORE", "SFORB")) { # {{{ Add decline term + if (use_of_ff == "min") { # Minimum use of formation fractions + if(spec[[varname]]$type == "IORE" && length(spec[[varname]]$to) > 0) { + stop("Transformation reactions from compounds modelled with IORE\n", + "are only supported with formation fractions (use_of_ff = 'max')") + } + if(spec[[varname]]$sink) { + # If sink is required, add first-order/IORE sink term + k_compound_sink <- paste("k", box_1, "sink", sep = "_") + if(spec[[varname]]$type == "IORE") { + k_compound_sink <- paste("k__iore", box_1, "sink", sep = "_") + } + parms <- c(parms, k_compound_sink) + decline_term <- paste(k_compound_sink, "*", box_1) + if(spec[[varname]]$type == "IORE") { + N <- paste("N", box_1, sep = "_") + parms <- c(parms, N) + decline_term <- paste0(decline_term, "^", N) + } + } else { # otherwise no decline term needed here + decline_term = "0" + } + } else { + k_compound <- paste("k", box_1, sep = "_") + if(spec[[varname]]$type == "IORE") { + k_compound <- paste("k__iore", box_1, sep = "_") + } + parms <- c(parms, k_compound) + decline_term <- paste(k_compound, "*", box_1) + if(spec[[varname]]$type == "IORE") { + N <- paste("N", box_1, sep = "_") + parms <- c(parms, N) + decline_term <- paste0(decline_term, "^", N) + } + } + } #}}} + if(spec[[varname]]$type == "FOMC") { # {{{ Add FOMC decline term + # From p. 53 of the FOCUS kinetics report, without the power function so it works in C + decline_term <- paste("(alpha/beta) * 1/((time/beta) + 1) *", box_1) + parms <- c(parms, "alpha", "beta") + } #}}} + if(spec[[varname]]$type == "DFOP") { # {{{ Add DFOP decline term + # From p. 57 of the FOCUS kinetics report + decline_term <- paste("((k1 * g * exp(-k1 * time) + k2 * (1 - g) * exp(-k2 * time)) / (g * exp(-k1 * time) + (1 - g) * exp(-k2 * time))) *", box_1) + parms <- c(parms, "k1", "k2", "g") + } #}}} + HS_decline <- "ifelse(time <= tb, k1, k2)" # Used below for automatic translation to C + if(spec[[varname]]$type == "HS") { # {{{ Add HS decline term + # From p. 55 of the FOCUS kinetics report + decline_term <- paste(HS_decline, "*", box_1) + parms <- c(parms, "k1", "k2", "tb") + } #}}} + if(spec[[varname]]$type == "logistic") { # {{{ Add logistic decline term + # From p. 67 of the FOCUS kinetics report (2014) + decline_term <- paste("(k0 * kmax)/(k0 + (kmax - k0) * exp(-r * time)) *", box_1) + parms <- c(parms, "kmax", "k0", "r") + } #}}} + # Add origin decline term to box 1 (usually the only box, unless type is SFORB)#{{{ + diffs[[box_1]] <- paste(diffs[[box_1]], "-", decline_term)#}}} + if(spec[[varname]]$type == "SFORB") { # {{{ Add SFORB reversible binding terms + box_2 = map[[varname]][[2]] + if (use_of_ff == "min") { # Minimum use of formation fractions + k_free_bound <- paste("k", varname, "free", "bound", sep = "_") + k_bound_free <- paste("k", varname, "bound", "free", sep = "_") + parms <- c(parms, k_free_bound, k_bound_free) + reversible_binding_term_1 <- paste("-", k_free_bound, "*", box_1, "+", + k_bound_free, "*", box_2) + reversible_binding_term_2 <- paste("+", k_free_bound, "*", box_1, "-", + k_bound_free, "*", box_2) + } else { # Use formation fractions also for the free compartment + stop("The maximum use of formation fractions is not supported for SFORB models") + # The problems were: Calculation of dissipation times did not work in this case + # and the coefficient matrix is not generated correctly by the code present + # in this file in this case + #f_free_bound <- paste("f", varname, "free", "bound", sep = "_") + #k_bound_free <- paste("k", varname, "bound", "free", sep = "_") + #parms <- c(parms, f_free_bound, k_bound_free) + #reversible_binding_term_1 <- paste("+", k_bound_free, "*", box_2) + #reversible_binding_term_2 <- paste("+", f_free_bound, "*", k_compound, "*", box_1, "-", + # k_bound_free, "*", box_2) + } + diffs[[box_1]] <- paste(diffs[[box_1]], reversible_binding_term_1) + diffs[[box_2]] <- paste(diffs[[box_2]], reversible_binding_term_2) + } #}}} + + # Transfer between compartments#{{{ + to <- spec[[varname]]$to + if(!is.null(to)) { + # Name of box from which transfer takes place + origin_box <- box_1 + + # Number of targets + n_targets = length(to) + + # Add transfer terms to listed compartments + for (target in to) { + if (!target %in% obs_vars) stop("You did not specify a submodel for target variable ", target) + target_box <- switch(spec[[target]]$type, + SFO = target, + IORE = target, + SFORB = paste(target, "free", sep = "_")) + if (use_of_ff == "min" && spec[[varname]]$type %in% c("SFO", "SFORB")) + { + k_from_to <- paste("k", origin_box, target_box, sep = "_") + parms <- c(parms, k_from_to) + diffs[[origin_box]] <- paste(diffs[[origin_box]], "-", + k_from_to, "*", origin_box) + diffs[[target_box]] <- paste(diffs[[target_box]], "+", + k_from_to, "*", origin_box) + } else { + # Do not introduce a formation fraction if this is the only target + if (spec[[origin_box]]$sink == FALSE && n_targets == 1) { + diffs[[target_box]] <- paste(diffs[[target_box]], "+", + decline_term) + } else { + fraction_to_target = paste("f", origin_box, "to", target, sep = "_") + parms <- c(parms, fraction_to_target) + diffs[[target_box]] <- paste(diffs[[target_box]], "+", + fraction_to_target, "*", decline_term) + } + } + } + } #}}} + } #}}} + + model <- list(diffs = diffs, parms = parms, map = map, spec = spec, use_of_ff = use_of_ff) + + # Create coefficient matrix if appropriate#{{{ + if (mat) { + boxes <- names(diffs) + n <- length(boxes) + m <- matrix(nrow=n, ncol=n, dimnames=list(boxes, boxes)) + + if (use_of_ff == "min") { # {{{ Minimum use of formation fractions + for (from in boxes) { + for (to in boxes) { + if (from == to) { # diagonal elements + k.candidate = paste("k", from, c(boxes, "sink"), sep = "_") + k.candidate = sub("free.*bound", "free_bound", k.candidate) + k.candidate = sub("bound.*free", "bound_free", k.candidate) + k.effective = intersect(model$parms, k.candidate) + m[from,to] = ifelse(length(k.effective) > 0, + paste("-", k.effective, collapse = " "), "0") + + } else { # off-diagonal elements + k.candidate = paste("k", from, to, sep = "_") + if (sub("_free$", "", from) == sub("_bound$", "", to)) { + k.candidate = paste("k", sub("_free$", "_free_bound", from), sep = "_") + } + if (sub("_bound$", "", from) == sub("_free$", "", to)) { + k.candidate = paste("k", sub("_bound$", "_bound_free", from), sep = "_") + } + k.effective = intersect(model$parms, k.candidate) + m[to, from] = ifelse(length(k.effective) > 0, + k.effective, "0") + } + } + } # }}} + } else { # {{{ Use formation fractions where possible + for (from in boxes) { + for (to in boxes) { + if (from == to) { # diagonal elements + k.candidate = paste("k", from, sep = "_") + m[from,to] = ifelse(k.candidate %in% model$parms, + paste("-", k.candidate), "0") + if(grepl("_free", from)) { # add transfer to bound compartment for SFORB + m[from,to] = paste(m[from,to], "-", paste("k", from, "bound", sep = "_")) + } + if(grepl("_bound", from)) { # add backtransfer to free compartment for SFORB + m[from,to] = paste("- k", from, "free", sep = "_") + } + m[from,to] = m[from,to] + } else { # off-diagonal elements + f.candidate = paste("f", from, "to", to, sep = "_") + k.candidate = paste("k", from, to, sep = "_") + # SFORB with maximum use of formation fractions not implemented, see above + m[to, from] = ifelse(f.candidate %in% model$parms, + paste(f.candidate, " * k_", from, sep = ""), + ifelse(k.candidate %in% model$parms, k.candidate, "0")) + # Special case: singular pathway and no sink + if (spec[[from]]$sink == FALSE && length(spec[[from]]$to) == 1 && to %in% spec[[from]]$to) { + m[to, from] = paste("k", from, sep = "_") + } + } + } + } + } # }}} + model$coefmat <- m + }#}}} + + # Try to create a function compiled from C code if more than one observed {{{ + # variable and gcc is available + if (length(obs_vars) > 1) { + if (Sys.which("gcc") != "") { + + # Translate the R code for the derivatives to C code + diffs.C <- paste(diffs, collapse = ";\n") + diffs.C <- paste0(diffs.C, ";") + + # HS + diffs.C <- gsub(HS_decline, "(time <= tb ? k1 : k2)", diffs.C, fixed = TRUE) + + for (i in seq_along(diffs)) { + state_var <- names(diffs)[i] + + # IORE + if (state_var %in% obs_vars) { + if (spec[[state_var]]$type == "IORE") { + diffs.C <- gsub(paste0(state_var, "^N_", state_var), + paste0("pow(y[", i - 1, "], N_", state_var, ")"), + diffs.C, fixed = TRUE) + } + } + + # Replace d_... terms by f[i-1] + # First line + pattern <- paste0("^d_", state_var) + replacement <- paste0("\nf[", i - 1, "]") + diffs.C <- gsub(pattern, replacement, diffs.C) + # Other lines + pattern <- paste0("\\nd_", state_var) + replacement <- paste0("\nf[", i - 1, "]") + diffs.C <- gsub(pattern, replacement, diffs.C) + + # Replace names of observed variables by y[i], + # making the implicit assumption that the observed variables only occur after "* " + pattern <- paste0("\\* ", state_var) + replacement <- paste0("* y[", i - 1, "]") + diffs.C <- gsub(pattern, replacement, diffs.C) + } + + derivs_sig <- signature(n = "integer", t = "numeric", y = "numeric", + f = "numeric", rpar = "numeric", ipar = "integer") + + # Declare the time variable in the body of the function if it is used + derivs_code <- if (spec[[1]]$type %in% c("FOMC", "DFOP", "HS")) { + paste0("double time = *t;\n", diffs.C) + } else { + diffs.C + } + + # Define the function initializing the parameters + npar <- length(parms) + initpar_code <- paste0( + "static double parms [", npar, "];\n", + paste0("#define ", parms, " parms[", 0:(npar - 1), "]\n", collapse = ""), + "\n", + "void initpar(void (* odeparms)(int *, double *)) {\n", + " int N = ", npar, ";\n", + " odeparms(&N, parms);\n", + "}\n\n") + + # Try to build a shared library + cf <- try(cfunction(list(func = derivs_sig), derivs_code, + otherdefs = initpar_code, + verbose = verbose, + convention = ".C", language = "C"), + silent = TRUE) + + if (!inherits(cf, "try-error")) { + if (!quiet) message("Successfully compiled differential equation model from auto-generated C code.") + model$cf <- cf + } + } + } + # }}} + + class(model) <- "mkinmod" + return(model) +} + +#' Print mkinmod objects +#' +#' Print mkinmod objects in a way that the user finds his way to get to its +#' components. +#' +#' @param x An \code{\link{mkinmod}} object. +#' @param \dots Not used. +#' @examples +#' +#' m_synth_SFO_lin <- mkinmod(parent = list(type = "SFO", to = "M1"), +#' M1 = list(type = "SFO", to = "M2"), +#' M2 = list(type = "SFO"), use_of_ff = "max") +#' +#' print(m_synth_SFO_lin) +#' +#' @export +print.mkinmod <- function(x, ...) { + cat(" model generated with\n") + cat("Use of formation fractions $use_of_ff:", x$use_of_ff, "\n") + cat("Specification $spec:\n") + for (obs in names(x$spec)) { + cat("$", obs, "\n", sep = "") + spl <- x$spec[[obs]] + cat("$type:", spl$type) + if (!is.null(spl$to) && length(spl$to)) cat("; $to: ", paste(spl$to, collapse = ", "), sep = "") + cat("; $sink: ", spl$sink, sep = "") + if (!is.null(spl$full_name)) if (!is.na(spl$full_name)) cat("; $full_name:", spl$full_name) + cat("\n") + } + if (is.matrix(x$coefmat)) cat("Coefficient matrix $coefmat available\n") + if (!is.null(x$cf)) cat("Compiled model $cf available\n") + cat("Differential equations:\n") + nice_diffs <- gsub("^(d.*) =", "\\1/dt =", x[["diffs"]]) + writeLines(strwrap(nice_diffs, exdent = 11)) +} +# vim: set foldmethod=marker ts=2 sw=2 expandtab: diff --git a/R/mkinparplot.R b/R/mkinparplot.R index af28e3a8..f9abab5b 100644 --- a/R/mkinparplot.R +++ b/R/mkinparplot.R @@ -1,20 +1,23 @@ -# Copyright (C) 2014 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see +#' Function to plot the confidence intervals obtained using mkinfit +#' +#' This function plots the confidence intervals for the parameters fitted using +#' \code{\link{mkinfit}}. +#' +#' @param object A fit represented in an \code{\link{mkinfit}} object. +#' @return Nothing is returned by this function, as it is called for its side +#' effect, namely to produce a plot. +#' @author Johannes Ranke +#' @examples +#' +#' \dontrun{ +#' model <- mkinmod( +#' T245 = mkinsub("SFO", to = c("phenol"), sink = FALSE), +#' phenol = mkinsub("SFO", to = c("anisole")), +#' anisole = mkinsub("SFO"), use_of_ff = "max") +#' fit <- mkinfit(model, subset(mccall81_245T, soil == "Commerce"), quiet = TRUE) +#' mkinparplot(fit) +#' } +#' @export mkinparplot <- function(object) { state.optim = rownames(subset(object$start, type == "state")) deparms.optim = rownames(subset(object$start, type == "deparm")) diff --git a/R/mkinplot.R b/R/mkinplot.R deleted file mode 100644 index b9becfdf..00000000 --- a/R/mkinplot.R +++ /dev/null @@ -1,4 +0,0 @@ -mkinplot <- function(fit, ...) -{ - plot(fit, ...) -} diff --git a/R/mkinpredict.R b/R/mkinpredict.R index c36d724a..8949e800 100644 --- a/R/mkinpredict.R +++ b/R/mkinpredict.R @@ -1,22 +1,99 @@ -# Copyright (C) 2010-2016,2018,2019 Johannes Ranke -# Some lines in this code are copyright (C) 2013 Eurofins Regulatory AG -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see - +#' Produce predictions from a kinetic model using specific parameters +#' +#' This function produces a time series for all the observed variables in a +#' kinetic model as specified by \code{\link{mkinmod}}, using a specific set of +#' kinetic parameters and initial values for the state variables. +#' +#' @aliases mkinpredict mkinpredict.mkinmod mkinpredict.mkinfit +#' @param x A kinetic model as produced by \code{\link{mkinmod}}, or a kinetic +#' fit as fitted by \code{\link{mkinfit}}. In the latter case, the fitted +#' parameters are used for the prediction. +#' @param odeparms A numeric vector specifying the parameters used in the +#' kinetic model, which is generally defined as a set of ordinary +#' differential equations. +#' @param odeini A numeric vectory containing the initial values of the state +#' variables of the model. Note that the state variables can differ from the +#' observed variables, for example in the case of the SFORB model. +#' @param outtimes A numeric vector specifying the time points for which model +#' predictions should be generated. +#' @param solution_type The method that should be used for producing the +#' predictions. This should generally be "analytical" if there is only one +#' observed variable, and usually "deSolve" in the case of several observed +#' variables. The third possibility "eigen" is faster but not applicable to +#' some models e.g. using FOMC for the parent compound. +#' @param method.ode The solution method passed via \code{\link{mkinpredict}} +#' to \code{\link{ode}} in case the solution type is "deSolve". The default +#' "lsoda" is performant, but sometimes fails to converge. +#' @param use_compiled If set to \code{FALSE}, no compiled version of the +#' \code{\link{mkinmod}} model is used, even if is present. +#' @param atol Absolute error tolerance, passed to \code{\link{ode}}. Default +#' is 1e-8, lower than in \code{\link{lsoda}}. +#' @param rtol Absolute error tolerance, passed to \code{\link{ode}}. Default +#' is 1e-10, much lower than in \code{\link{lsoda}}. +#' @param map_output Boolean to specify if the output should list values for +#' the observed variables (default) or for all state variables (if set to +#' FALSE). +#' @param \dots Further arguments passed to the ode solver in case such a +#' solver is used. +#' @import deSolve +#' @importFrom inline getDynLib +#' @return A matrix in the same format as the output of \code{\link{ode}}. +#' @author Johannes Ranke +#' @examples +#' +#' SFO <- mkinmod(degradinol = mkinsub("SFO")) +#' # Compare solution types +#' mkinpredict(SFO, c(k_degradinol_sink = 0.3), c(degradinol = 100), 0:20, +#' solution_type = "analytical") +#' mkinpredict(SFO, c(k_degradinol_sink = 0.3), c(degradinol = 100), 0:20, +#' solution_type = "deSolve") +#' mkinpredict(SFO, c(k_degradinol_sink = 0.3), c(degradinol = 100), 0:20, +#' solution_type = "deSolve", use_compiled = FALSE) +#' mkinpredict(SFO, c(k_degradinol_sink = 0.3), c(degradinol = 100), 0:20, +#' solution_type = "eigen") +#' +#' +#' # Compare integration methods to analytical solution +#' mkinpredict(SFO, c(k_degradinol_sink = 0.3), c(degradinol = 100), 0:20, +#' solution_type = "analytical")[21,] +#' mkinpredict(SFO, c(k_degradinol_sink = 0.3), c(degradinol = 100), 0:20, +#' method = "lsoda")[21,] +#' mkinpredict(SFO, c(k_degradinol_sink = 0.3), c(degradinol = 100), 0:20, +#' method = "ode45")[21,] +#' mkinpredict(SFO, c(k_degradinol_sink = 0.3), c(degradinol = 100), 0:20, +#' method = "rk4")[21,] +#' # rk4 is not as precise here +#' +#' # The number of output times used to make a lot of difference until the +#' # default for atol was adjusted +#' mkinpredict(SFO, c(k_degradinol_sink = 0.3), c(degradinol = 100), +#' seq(0, 20, by = 0.1))[201,] +#' mkinpredict(SFO, c(k_degradinol_sink = 0.3), c(degradinol = 100), +#' seq(0, 20, by = 0.01))[2001,] +#' +#' # Check compiled model versions - they are faster than the eigenvalue based solutions! +#' SFO_SFO = mkinmod(parent = list(type = "SFO", to = "m1"), +#' m1 = list(type = "SFO")) +#' system.time( +#' print(mkinpredict(SFO_SFO, c(k_parent_m1 = 0.05, k_parent_sink = 0.1, k_m1_sink = 0.01), +#' c(parent = 100, m1 = 0), seq(0, 20, by = 0.1), +#' solution_type = "eigen")[201,])) +#' system.time( +#' print(mkinpredict(SFO_SFO, c(k_parent_m1 = 0.05, k_parent_sink = 0.1, k_m1_sink = 0.01), +#' c(parent = 100, m1 = 0), seq(0, 20, by = 0.1), +#' solution_type = "deSolve")[201,])) +#' system.time( +#' print(mkinpredict(SFO_SFO, c(k_parent_m1 = 0.05, k_parent_sink = 0.1, k_m1_sink = 0.01), +#' c(parent = 100, m1 = 0), seq(0, 20, by = 0.1), +#' solution_type = "deSolve", use_compiled = FALSE)[201,])) +#' +#' \dontrun{ +#' # Predict from a fitted model +#' f <- mkinfit(SFO_SFO, FOCUS_2006_C) +#' head(mkinpredict(f)) +#' } +#' +#' @export mkinpredict <- function(x, odeparms, odeini, outtimes = seq(0, 120, by = 0.1), solution_type = "deSolve", @@ -27,6 +104,8 @@ mkinpredict <- function(x, odeparms, odeini, UseMethod("mkinpredict", x) } +#' @rdname mkinpredict +#' @export mkinpredict.mkinmod <- function(x, odeparms = c(k_parent_sink = 0.1), odeini = c(parent = 100), @@ -164,6 +243,8 @@ mkinpredict.mkinmod <- function(x, } } +#' @rdname mkinpredict +#' @export mkinpredict.mkinfit <- function(x, odeparms = x$bparms.ode, odeini = x$bparms.state, diff --git a/R/mkinresplot.R b/R/mkinresplot.R index 974d0549..5377dbf2 100644 --- a/R/mkinresplot.R +++ b/R/mkinresplot.R @@ -1,65 +1,84 @@ -# Copyright (C) 2008-2014,2019 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see -if(getRversion() >= '2.15.1') utils::globalVariables(c("variable", "residual")) - -mkinresplot <- function (object, - obs_vars = names(object$mkinmod$map), - xlim = c(0, 1.1 * max(object$data$time)), - xlab = "Time", ylab = "Residual", - maxabs = "auto", legend= TRUE, lpos = "topright", - col_obs = "auto", pch_obs = "auto", - frame = TRUE, - ...) -{ - obs_vars_all <- as.character(unique(object$data$variable)) - - if (length(obs_vars) > 0){ - obs_vars <- intersect(obs_vars_all, obs_vars) - } else obs_vars <- obs_vars_all - - residuals <- subset(object$data, variable %in% obs_vars, residual) - - if (maxabs == "auto") maxabs = max(abs(residuals), na.rm = TRUE) - - # Set colors and symbols - if (col_obs[1] == "auto") { - col_obs <- 1:length(obs_vars) - } - - if (pch_obs[1] == "auto") { - pch_obs <- 1:length(obs_vars) - } - names(col_obs) <- names(pch_obs) <- obs_vars - - plot(0, type = "n", frame = frame, - xlab = xlab, ylab = ylab, - xlim = xlim, - ylim = c(-1.2 * maxabs, 1.2 * maxabs), ...) - - for(obs_var in obs_vars){ - residuals_plot <- subset(object$data, variable == obs_var, c("time", "residual")) - points(residuals_plot, pch = pch_obs[obs_var], col = col_obs[obs_var]) - } - - abline(h = 0, lty = 2) - - if (legend == TRUE) { - legend(lpos, inset = c(0.05, 0.05), legend = obs_vars, - col = col_obs[obs_vars], pch = pch_obs[obs_vars]) - } -} +if(getRversion() >= '2.15.1') utils::globalVariables(c("variable", "residual")) + +#' Function to plot residuals stored in an mkin object +#' +#' This function plots the residuals for the specified subset of the observed +#' variables from an mkinfit object. A combined plot of the fitted model and +#' the residuals can be obtained using \code{\link{plot.mkinfit}} using the +#' argument \code{show_residuals = TRUE}. +#' +#' @param object A fit represented in an \code{\link{mkinfit}} object. +#' @param obs_vars A character vector of names of the observed variables for +#' which residuals should be plotted. Defaults to all observed variables in +#' the model +#' @param xlim plot range in x direction. +#' @param xlab Label for the x axis. Defaults to "Time [days]". +#' @param ylab Label for the y axis. Defaults to "Residual [\% of applied +#' radioactivity]". +#' @param maxabs Maximum absolute value of the residuals. This is used for the +#' scaling of the y axis and defaults to "auto". +#' @param legend Should a legend be plotted? Defaults to "TRUE". +#' @param lpos Where should the legend be placed? Default is "topright". Will +#' be passed on to \code{\link{legend}}. +#' @param col_obs Colors for the observed variables. +#' @param pch_obs Symbols to be used for the observed variables. +#' @param frame Should a frame be drawn around the plots? +#' @param \dots further arguments passed to \code{\link{plot}}. +#' @return Nothing is returned by this function, as it is called for its side +#' effect, namely to produce a plot. +#' @author Johannes Ranke +#' @seealso \code{\link{mkinplot}}, for a way to plot the data and the fitted +#' lines of the mkinfit object. +#' @examples +#' +#' model <- mkinmod(parent = mkinsub("SFO", "m1"), m1 = mkinsub("SFO")) +#' fit <- mkinfit(model, FOCUS_2006_D, quiet = TRUE) +#' mkinresplot(fit, "m1") +#' +#' @export +mkinresplot <- function (object, + obs_vars = names(object$mkinmod$map), + xlim = c(0, 1.1 * max(object$data$time)), + xlab = "Time", ylab = "Residual", + maxabs = "auto", legend= TRUE, lpos = "topright", + col_obs = "auto", pch_obs = "auto", + frame = TRUE, + ...) +{ + obs_vars_all <- as.character(unique(object$data$variable)) + + if (length(obs_vars) > 0){ + obs_vars <- intersect(obs_vars_all, obs_vars) + } else obs_vars <- obs_vars_all + + residuals <- subset(object$data, variable %in% obs_vars, residual) + + if (maxabs == "auto") maxabs = max(abs(residuals), na.rm = TRUE) + + # Set colors and symbols + if (col_obs[1] == "auto") { + col_obs <- 1:length(obs_vars) + } + + if (pch_obs[1] == "auto") { + pch_obs <- 1:length(obs_vars) + } + names(col_obs) <- names(pch_obs) <- obs_vars + + plot(0, type = "n", frame = frame, + xlab = xlab, ylab = ylab, + xlim = xlim, + ylim = c(-1.2 * maxabs, 1.2 * maxabs), ...) + + for(obs_var in obs_vars){ + residuals_plot <- subset(object$data, variable == obs_var, c("time", "residual")) + points(residuals_plot, pch = pch_obs[obs_var], col = col_obs[obs_var]) + } + + abline(h = 0, lty = 2) + + if (legend == TRUE) { + legend(lpos, inset = c(0.05, 0.05), legend = obs_vars, + col = col_obs[obs_vars], pch = pch_obs[obs_vars]) + } +} diff --git a/R/mkinsub.R b/R/mkinsub.R index 99c3ea20..db91ca00 100644 --- a/R/mkinsub.R +++ b/R/mkinsub.R @@ -1,23 +1,39 @@ -# Copyright (C) 2014,2015 Johannes Ranke -# Portions of this code are copyright (C) 2013 Eurofins Regulatory AG -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see +#' Function to set up a kinetic submodel for one state variable +#' +#' This is a convenience function to set up the lists used as arguments for +#' \code{\link{mkinmod}}. +#' +#' @param submodel Character vector of length one to specify the submodel type. +#' See \code{\link{mkinmod}} for the list of allowed submodel names. +#' @param to Vector of the names of the state variable to which a +#' transformation shall be included in the model. +#' @param sink Should a pathway to sink be included in the model in addition to +#' the pathways to other state variables? +#' @param full_name An optional name to be used e.g. for plotting fits +#' performed with the model. You can use non-ASCII characters here, but then +#' your R code will not be portable, \emph{i.e.} may produce unintended plot +#' results on other operating systems or system configurations. +#' @return A list for use with \code{\link{mkinmod}}. +#' @author Johannes Ranke +#' @examples +#' +#' # One parent compound, one metabolite, both single first order. +#' SFO_SFO <- mkinmod( +#' parent = list(type = "SFO", to = "m1"), +#' m1 = list(type = "SFO")) +#' +#' # The same model using mkinsub +#' SFO_SFO.2 <- mkinmod( +#' parent = mkinsub("SFO", "m1"), +#' m1 = mkinsub("SFO")) +#' +#' # Now supplying full names +#' SFO_SFO.2 <- mkinmod( +#' parent = mkinsub("SFO", "m1", full_name = "Test compound"), +#' m1 = mkinsub("SFO", full_name = "Metabolite M1")) +#' +#' @export mkinsub <- function(submodel, to = NULL, sink = TRUE, full_name = NA) { return(list(type = submodel, to = to, sink = sink, full_name = full_name)) } -# vim: set ts=2 sw=2 expandtab: diff --git a/R/mmkin.R b/R/mmkin.R index b713ae74..4f3f28a9 100644 --- a/R/mmkin.R +++ b/R/mmkin.R @@ -1,24 +1,65 @@ -# Copyright (C) 2015,2019 Johannes Ranke -# Contact: jranke@uni-bremen.de -# The summary function is an adapted and extended version of summary.modFit -# from the FME package, v 1.1 by Soetart and Petzoldt, which was in turn -# inspired by summary.nls.lm - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see - +#' Fit one or more kinetic models with one or more state variables to one or +#' more datasets +#' +#' This function calls \code{\link{mkinfit}} on all combinations of models and +#' datasets specified in its first two arguments. +#' +#' @param models Either a character vector of shorthand names like +#' \code{c("SFO", "FOMC", "DFOP", "HS", "SFORB")}, or an optionally named +#' list of \code{\link{mkinmod}} objects. +#' @param datasets An optionally named list of datasets suitable as observed +#' data for \code{\link{mkinfit}}. +#' @param cores The number of cores to be used for multicore processing. This +#' is only used when the \code{cluster} argument is \code{NULL}. On Windows +#' machines, cores > 1 is not supported, you need to use the \code{cluster} +#' argument to use multiple logical processors. +#' @param cluster A cluster as returned by \code{\link{makeCluster}} to be used +#' for parallel execution. +#' @param \dots Further arguments that will be passed to \code{\link{mkinfit}}. +#' @importFrom parallel mclapply parLapply detectCores +#' @return A matrix of \code{\link{mkinfit}} objects that can be indexed using +#' the model and dataset names as row and column indices. +#' @author Johannes Ranke +#' @seealso \code{\link{[.mmkin}} for subsetting, \code{\link{plot.mmkin}} for +#' plotting. +#' @keywords optimize +#' @examples +#' +#' \dontrun{ +#' m_synth_SFO_lin <- mkinmod(parent = mkinsub("SFO", "M1"), +#' M1 = mkinsub("SFO", "M2"), +#' M2 = mkinsub("SFO"), use_of_ff = "max") +#' +#' m_synth_FOMC_lin <- mkinmod(parent = mkinsub("FOMC", "M1"), +#' M1 = mkinsub("SFO", "M2"), +#' M2 = mkinsub("SFO"), use_of_ff = "max") +#' +#' models <- list(SFO_lin = m_synth_SFO_lin, FOMC_lin = m_synth_FOMC_lin) +#' datasets <- lapply(synthetic_data_for_UBA_2014[1:3], function(x) x$data) +#' names(datasets) <- paste("Dataset", 1:3) +#' +#' time_default <- system.time(fits.0 <- mmkin(models, datasets, quiet = TRUE)) +#' time_1 <- system.time(fits.4 <- mmkin(models, datasets, cores = 1, quiet = TRUE)) +#' +#' time_default +#' time_1 +#' +#' endpoints(fits.0[["SFO_lin", 2]]) +#' +#' # plot.mkinfit handles rows or columns of mmkin result objects +#' plot(fits.0[1, ]) +#' plot(fits.0[1, ], obs_var = c("M1", "M2")) +#' plot(fits.0[, 1]) +#' # Use double brackets to extract a single mkinfit object, which will be plotted +#' # by plot.mkinfit and can be plotted using plot_sep +#' plot(fits.0[[1, 1]], sep_obs = TRUE, show_residuals = TRUE, show_errmin = TRUE) +#' plot_sep(fits.0[[1, 1]]) +#' # Plotting with mmkin (single brackets, extracting an mmkin object) does not +#' # allow to plot the observed variables separately +#' plot(fits.0[1, 1]) +#' } +#' +#' @export mmkin mmkin <- function(models = c("SFO", "FOMC", "DFOP"), datasets, cores = round(detectCores()/2), cluster = NULL, ...) { @@ -66,7 +107,35 @@ mmkin <- function(models = c("SFO", "FOMC", "DFOP"), datasets, return(results) } -"[.mmkin" <- function(x, i, j, ..., drop = FALSE) { +#' Subsetting method for mmkin objects +#' +#' Subsetting method for mmkin objects. +#' +#' @param x An \code{\link{mmkin} object} +#' @param i Row index selecting the fits for specific models +#' @param j Column index selecting the fits to specific datasets +#' @param ... Not used, only there to satisfy the generic method definition +#' @param drop If FALSE, the method always returns an mmkin object, otherwise +#' either a list of mkinfit objects or a single mkinfit object. +#' @return An object of class \code{\link{mmkin}}. +#' @author Johannes Ranke +#' @rdname Extract.mmkin +#' @examples +#' +#' # Only use one core, to pass R CMD check --as-cran +#' fits <- mmkin(c("SFO", "FOMC"), list(B = FOCUS_2006_B, C = FOCUS_2006_C), +#' cores = 1, quiet = TRUE) +#' fits["FOMC", ] +#' fits[, "B"] +#' fits["SFO", "B"] +#' +#' head( +#' # This extracts an mkinfit object with lots of components +#' fits[["FOMC", "B"]] +#' ) +#' +#' @export +`[.mmkin` <- function(x, i, j, ..., drop = FALSE) { class(x) <- NULL x_sub <- x[i, j, drop = drop] if (!drop) class(x_sub) <- "mmkin" diff --git a/R/nafta.R b/R/nafta.R index ef752e0c..7e5873d8 100644 --- a/R/nafta.R +++ b/R/nafta.R @@ -1,21 +1,40 @@ -# Copyright (C) 2019 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see - +#' Evaluate parent kinetics using the NAFTA guidance +#' +#' The function fits the SFO, IORE and DFOP models using \code{\link{mmkin}} +#' and returns an object of class \code{nafta} that has methods for printing +#' and plotting. +#' +#' @param ds A dataframe that must contain one variable called "time" with the +#' time values specified by the \code{time} argument, one column called +#' "name" with the grouping of the observed values, and finally one column of +#' observed values called "value". +#' @param title Optional title of the dataset +#' @param quiet Should the evaluation text be shown? +#' @param \dots Further arguments passed to \code{\link{mmkin}} (not for the +#' printing method). +#' @importFrom stats qf +#' @return An list of class \code{nafta}. The list element named "mmkin" is the +#' \code{\link{mmkin}} object containing the fits of the three models. The +#' list element named "title" contains the title of the dataset used. The +#' list element "data" contains the dataset used in the fits. +#' @author Johannes Ranke +#' @source NAFTA (2011) Guidance for evaluating and calculating degradation +#' kinetics in environmental media. NAFTA Technical Working Group on +#' Pesticides +#' \url{https://www.epa.gov/pesticide-science-and-assessing-pesticide-risks/guidance-evaluating-and-calculating-degradation} +#' accessed 2019-02-22 +#' +#' US EPA (2015) Standard Operating Procedure for Using the NAFTA Guidance to +#' Calculate Representative Half-life Values and Characterizing Pesticide +#' Degradation +#' \url{https://www.epa.gov/pesticide-science-and-assessing-pesticide-risks/standard-operating-procedure-using-nafta-guidance} +#' @examples +#' +#' nafta_evaluation <- nafta(NAFTA_SOP_Appendix_D, cores = 1) +#' print(nafta_evaluation) +#' plot(nafta_evaluation) +#' +#' @export nafta <- function(ds, title = NA, quiet = FALSE, ...) { if (length(levels(ds$name)) > 1) { stop("The NAFTA procedure is only defined for decline data for a single compound") @@ -56,6 +75,20 @@ nafta <- function(ds, title = NA, quiet = FALSE, ...) { return(result) } +#' Plot the results of the three models used in the NAFTA scheme. +#' +#' The plots are ordered with increasing complexity of the model in this +#' function (SFO, then IORE, then DFOP). +#' +#' Calls \code{\link{plot.mmkin}}. +#' +#' @param x An object of class \code{\link{nafta}}. +#' @param legend Should a legend be added? +#' @param main Possibility to override the main title of the plot. +#' @param \dots Further arguments passed to \code{\link{plot.mmkin}}. +#' @return The function is called for its side effect. +#' @author Johannes Ranke +#' @export plot.nafta <- function(x, legend = FALSE, main = "auto", ...) { if (main == "auto") { if (is.na(x$title)) main = "" @@ -64,6 +97,16 @@ plot.nafta <- function(x, legend = FALSE, main = "auto", ...) { plot(x$mmkin, ..., legend = legend, main = main) } +#' Print nafta objects +#' +#' Print nafta objects. The results for the three models are printed in the +#' order of increasing model complexity, i.e. SFO, then IORE, and finally DFOP. +#' +#' @param x An \code{\link{nafta}} object. +#' @param digits Number of digits to be used for printing parameters and +#' dissipation times. +#' @rdname nafta +#' @export print.nafta <- function(x, quiet = TRUE, digits = 3, ...) { cat("Sums of squares:\n") print(x$S) diff --git a/R/plot.mkinfit.R b/R/plot.mkinfit.R index e39da416..16415a3b 100644 --- a/R/plot.mkinfit.R +++ b/R/plot.mkinfit.R @@ -1,22 +1,88 @@ -# Copyright (C) 2010-2016,2019 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see if(getRversion() >= '2.15.1') utils::globalVariables(c("type", "variable", "observed")) +#' Plot the observed data and the fitted model of an mkinfit object +#' +#' Solves the differential equations with the optimised and fixed parameters +#' from a previous successful call to \code{\link{mkinfit}} and plots the +#' observed data together with the solution of the fitted model. +#' +#' If the current plot device is a \code{\link[tikzDevice]{tikz}} device, then +#' latex is being used for the formatting of the chi2 error level, if +#' \code{show_errmin = TRUE}. +#' +#' @aliases plot.mkinfit plot_sep plot_res plot_err +#' @param x Alias for fit introduced for compatibility with the generic S3 +#' method. +#' @param fit An object of class \code{\link{mkinfit}}. +#' @param obs_vars A character vector of names of the observed variables for +#' which the data and the model should be plotted. Defauls to all observed +#' variables in the model. +#' @param xlab Label for the x axis. +#' @param ylab Label for the y axis. +#' @param xlim Plot range in x direction. +#' @param ylim Plot range in y direction. +#' @param col_obs Colors used for plotting the observed data and the +#' corresponding model prediction lines. +#' @param pch_obs Symbols to be used for plotting the data. +#' @param lty_obs Line types to be used for the model predictions. +#' @param add Should the plot be added to an existing plot? +#' @param legend Should a legend be added to the plot? +#' @param show_residuals Should residuals be shown? If only one plot of the +#' fits is shown, the residual plot is in the lower third of the plot. +#' Otherwise, i.e. if "sep_obs" is given, the residual plots will be located +#' to the right of the plots of the fitted curves. +#' @param show_errplot Should squared residuals and the error model be shown? +#' If only one plot of the fits is shown, this plot is in the lower third of +#' the plot. Otherwise, i.e. if "sep_obs" is given, the residual plots will +#' be located to the right of the plots of the fitted curves. +#' @param maxabs Maximum absolute value of the residuals. This is used for the +#' scaling of the y axis and defaults to "auto". +#' @param sep_obs Should the observed variables be shown in separate subplots? +#' If yes, residual plots requested by "show_residuals" will be shown next +#' to, not below the plot of the fits. +#' @param rel.height.middle The relative height of the middle plot, if more +#' than two rows of plots are shown. +#' @param row_layout Should we use a row layout where the residual plot or the +#' error model plot is shown to the right? +#' @param lpos Position(s) of the legend(s). Passed to \code{\link{legend}} as +#' the first argument. If not length one, this should be of the same length +#' as the obs_var argument. +#' @param inset Passed to \code{\link{legend}} if applicable. +#' @param show_errmin Should the FOCUS chi2 error value be shown in the upper +#' margin of the plot? +#' @param errmin_digits The number of significant digits for rounding the FOCUS +#' chi2 error percentage. +#' @param frame Should a frame be drawn around the plots? +#' @param \dots Further arguments passed to \code{\link{plot}}. +#' @import graphics +#' @importFrom grDevices dev.cur +#' @return The function is called for its side effect. +#' @author Johannes Ranke +#' @examples +#' +#' # One parent compound, one metabolite, both single first order, path from +#' # parent to sink included +#' \dontrun{ +#' SFO_SFO <- mkinmod(parent = mkinsub("SFO", "m1", full = "Parent"), +#' m1 = mkinsub("SFO", full = "Metabolite M1" )) +#' fit <- mkinfit(SFO_SFO, FOCUS_2006_D, quiet = TRUE, error_model = "tc") +#' plot(fit) +#' plot_res(fit) +#' plot_err(fit) +#' +#' # Show the observed variables separately, with residuals +#' plot(fit, sep_obs = TRUE, show_residuals = TRUE, lpos = c("topright", "bottomright"), +#' show_errmin = TRUE) +#' +#' # The same can be obtained with less typing, using the convenience function plot_sep +#' plot_sep(fit, lpos = c("topright", "bottomright")) +#' +#' # Show the observed variables separately, with the error model +#' plot(fit, sep_obs = TRUE, show_errplot = TRUE, lpos = c("topright", "bottomright"), +#' show_errmin = TRUE) +#' } +#' +#' @export plot.mkinfit <- function(x, fit = x, obs_vars = names(fit$mkinmod$map), xlab = "Time", ylab = "Observed", @@ -206,17 +272,39 @@ plot.mkinfit <- function(x, fit = x, } if (do_layout) par(oldpar, no.readonly = TRUE) } -# Convenience function for switching on some features of mkinfit -# that have not been made the default to keep compatibility + +#' @rdname plot.mkinfit +#' @export plot_sep <- function(fit, show_errmin = TRUE, ...) { plot.mkinfit(fit, sep_obs = TRUE, show_residuals = TRUE, show_errmin = show_errmin, ...) } + +#' @rdname plot.mkinfit +#' @export plot_res <- function(fit, sep_obs = FALSE, show_errmin = sep_obs, ...) { plot.mkinfit(fit, sep_obs = sep_obs, show_errmin = show_errmin, show_residuals = TRUE, row_layout = TRUE, ...) } + +#' @rdname plot.mkinfit +#' @export plot_err <- function(fit, sep_obs = FALSE, show_errmin = sep_obs, ...) { plot.mkinfit(fit, sep_obs = sep_obs, show_errmin = show_errmin, show_errplot = TRUE, row_layout = TRUE, ...) } + +#' Plot the observed data and the fitted model of an mkinfit object +#' +#' Deprecated function. It now only calls the plot method +#' \code{\link{plot.mkinfit}}. +#' +#' @param fit an object of class \code{\link{mkinfit}}. +#' @param \dots further arguments passed to \code{\link{plot.mkinfit}}. +#' @return The function is called for its side effect. +#' @author Johannes Ranke +#' @export +mkinplot <- function(fit, ...) +{ + plot(fit, ...) +} diff --git a/R/plot.mmkin.R b/R/plot.mmkin.R index ef80949c..eefafe12 100644 --- a/R/plot.mmkin.R +++ b/R/plot.mmkin.R @@ -1,21 +1,53 @@ -# Copyright (C) 2015-2016,2019 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see - +#' Plot model fits (observed and fitted) and the residuals for a row or column +#' of an mmkin object +#' +#' When x is a row selected from an mmkin object (\code{\link{[.mmkin}}), the +#' same model fitted for at least one dataset is shown. When it is a column, +#' the fit of at least one model to the same dataset is shown. +#' +#' If the current plot device is a \code{\link[tikzDevice]{tikz}} device, then +#' latex is being used for the formatting of the chi2 error level. +#' +#' @param x An object of class \code{\link{mmkin}}, with either one row or one +#' column. +#' @param main The main title placed on the outer margin of the plot. +#' @param legends An index for the fits for which legends should be shown. +#' @param resplot Should the residuals plotted against time, using +#' \code{\link{mkinresplot}}, or as squared residuals against predicted +#' values, with the error model, using \code{\link{mkinerrplot}}. +#' @param show_errmin Should the chi2 error level be shown on top of the plots +#' to the left? +#' @param errmin_var The variable for which the FOCUS chi2 error value should +#' be shown. +#' @param errmin_digits The number of significant digits for rounding the FOCUS +#' chi2 error percentage. +#' @param cex Passed to the plot functions and \code{\link{mtext}}. +#' @param rel.height.middle The relative height of the middle plot, if more +#' than two rows of plots are shown. +#' @param \dots Further arguments passed to \code{\link{plot.mkinfit}} and +#' \code{\link{mkinresplot}}. +#' @return The function is called for its side effect. +#' @author Johannes Ranke +#' @examples +#' +#' \dontrun{ +#' # Only use one core not to offend CRAN checks +#' fits <- mmkin(c("FOMC", "HS"), +#' list("FOCUS B" = FOCUS_2006_B, "FOCUS C" = FOCUS_2006_C), # named list for titles +#' cores = 1, quiet = TRUE, error_model = "tc") +#' plot(fits[, "FOCUS C"]) +#' plot(fits["FOMC", ]) +#' +#' # We can also plot a single fit, if we like the way plot.mmkin works, but then the plot +#' # height should be smaller than the plot width (this is not possible for the html pages +#' # generated by pkgdown, as far as I know). +#' plot(fits["FOMC", "FOCUS C"]) # same as plot(fits[1, 2]) +#' +#' # Show the error models +#' plot(fits["FOMC", ], resplot = "errmod") +#' } +#' +#' @export plot.mmkin <- function(x, main = "auto", legends = 1, resplot = c("time", "errmod"), show_errmin = TRUE, diff --git a/R/sigma_twocomp.R b/R/sigma_twocomp.R index b06816c1..c9a15aa8 100644 --- a/R/sigma_twocomp.R +++ b/R/sigma_twocomp.R @@ -1,3 +1,29 @@ +#' Two component error model +#' +#' Function describing the standard deviation of the measurement error in +#' dependence of the measured value \eqn{y}: +#' +#' \deqn{\sigma = \sqrt{ \sigma_{low}^2 + y^2 * {rsd}_{high}^2}} sigma = +#' sqrt(sigma_low^2 + y^2 * rsd_high^2) +#' +#' This is the error model used for example by Werner et al. (1978). The model +#' proposed by Rocke and Lorenzato (1995) can be written in this form as well, +#' but assumes approximate lognormal distribution of errors for high values of +#' y. +#' +#' @param y The magnitude of the observed value +#' @param sigma_low The asymptotic minimum of the standard deviation for low +#' observed values +#' @param rsd_high The coefficient describing the increase of the standard +#' deviation with the magnitude of the observed value +#' @return The standard deviation of the response variable. +#' @references Werner, Mario, Brooks, Samuel H., and Knott, Lancaster B. (1978) +#' Additive, Multiplicative, and Mixed Analytical Errors. Clinical Chemistry +#' 24(11), 1895-1898. +#' +#' Rocke, David M. and Lorenzato, Stefan (1995) A two-component model for +#' measurement error in analytical chemistry. Technometrics 37(2), 176-184. +#' @export sigma_twocomp <- function(y, sigma_low, rsd_high) { sqrt(sigma_low^2 + y^2 * rsd_high^2) } diff --git a/R/summary.mkinfit.R b/R/summary.mkinfit.R new file mode 100644 index 00000000..90f32da9 --- /dev/null +++ b/R/summary.mkinfit.R @@ -0,0 +1,272 @@ +#' Summary method for class "mkinfit" +#' +#' Lists model equations, initial parameter values, optimised parameters with +#' some uncertainty statistics, the chi2 error levels calculated according to +#' FOCUS guidance (2006) as defined therein, formation fractions, DT50 values +#' and optionally the data, consisting of observed, predicted and residual +#' values. +#' +#' @param object an object of class \code{\link{mkinfit}}. +#' @param x an object of class \code{summary.mkinfit}. +#' @param data logical, indicating whether the data should be included in the +#' summary. +#' @param distimes logical, indicating whether DT50 and DT90 values should be +#' included. +#' @param alpha error level for confidence interval estimation from t +#' distribution +#' @param digits Number of digits to use for printing +#' @param \dots optional arguments passed to methods like \code{print}. +#' @importFrom stats qt pt cov2cor +#' @return The summary function returns a list with components, among others +#' \item{version, Rversion}{The mkin and R versions used} +#' \item{date.fit, date.summary}{The dates where the fit and the summary were +#' produced} +#' \item{diffs}{The differential equations used in the model} +#' \item{use_of_ff}{Was maximum or minimum use made of formation fractions} +#' \item{bpar}{Optimised and backtransformed +#' parameters} +#' \item{data}{The data (see Description above).} +#' \item{start}{The starting values and bounds, if applicable, for optimised +#' parameters.} +#' \item{fixed}{The values of fixed parameters.} +#' \item{errmin }{The chi2 error levels for +#' each observed variable.} +#' \item{bparms.ode}{All backtransformed ODE +#' parameters, for use as starting parameters for related models.} +#' \item{errparms}{Error model parameters.} +#' \item{ff}{The estimated formation fractions derived from the fitted +#' model.} +#' \item{distimes}{The DT50 and DT90 values for each observed variable.} +#' \item{SFORB}{If applicable, eigenvalues of SFORB components of the model.} +#' The print method is called for its side effect, i.e. printing the summary. +#' @author Johannes Ranke +#' @references FOCUS (2006) \dQuote{Guidance Document on Estimating Persistence +#' and Degradation Kinetics from Environmental Fate Studies on Pesticides in +#' EU Registration} Report of the FOCUS Work Group on Degradation Kinetics, +#' EC Document Reference Sanco/10058/2005 version 2.0, 434 pp, +#' \url{http://esdac.jrc.ec.europa.eu/projects/degradation-kinetics} +#' @examples +#' +#' summary(mkinfit(mkinmod(parent = mkinsub("SFO")), FOCUS_2006_A, quiet = TRUE)) +#' +#' @export +summary.mkinfit <- function(object, data = TRUE, distimes = TRUE, alpha = 0.05, ...) { + param <- object$par + pnames <- names(param) + bpnames <- names(object$bparms.optim) + epnames <- names(object$errparms) + p <- length(param) + mod_vars <- names(object$mkinmod$diffs) + covar <- try(solve(object$hessian), silent = TRUE) + covar_notrans <- try(solve(object$hessian_notrans), silent = TRUE) + rdf <- object$df.residual + + if (!is.numeric(covar) | is.na(covar[1])) { + covar <- NULL + se <- lci <- uci <- rep(NA, p) + } else { + rownames(covar) <- colnames(covar) <- pnames + se <- sqrt(diag(covar)) + lci <- param + qt(alpha/2, rdf) * se + uci <- param + qt(1-alpha/2, rdf) * se + } + + beparms.optim <- c(object$bparms.optim, object$par[epnames]) + if (!is.numeric(covar_notrans) | is.na(covar_notrans[1])) { + covar_notrans <- NULL + se_notrans <- tval <- pval <- rep(NA, p) + } else { + rownames(covar_notrans) <- colnames(covar_notrans) <- c(bpnames, epnames) + se_notrans <- sqrt(diag(covar_notrans)) + tval <- beparms.optim / se_notrans + pval <- pt(abs(tval), rdf, lower.tail = FALSE) + } + + names(se) <- pnames + + param <- cbind(param, se, lci, uci) + dimnames(param) <- list(pnames, c("Estimate", "Std. Error", "Lower", "Upper")) + + bparam <- cbind(Estimate = beparms.optim, se_notrans, + "t value" = tval, "Pr(>t)" = pval, Lower = NA, Upper = NA) + + # Transform boundaries of CI for one parameter at a time, + # with the exception of sets of formation fractions (single fractions are OK). + f_names_skip <- character(0) + for (box in mod_vars) { # Figure out sets of fractions to skip + f_names <- grep(paste("^f", box, sep = "_"), pnames, value = TRUE) + n_paths <- length(f_names) + if (n_paths > 1) f_names_skip <- c(f_names_skip, f_names) + } + + for (pname in pnames) { + if (!pname %in% f_names_skip) { + par.lower <- param[pname, "Lower"] + par.upper <- param[pname, "Upper"] + names(par.lower) <- names(par.upper) <- pname + bpl <- backtransform_odeparms(par.lower, object$mkinmod, + object$transform_rates, + object$transform_fractions) + bpu <- backtransform_odeparms(par.upper, object$mkinmod, + object$transform_rates, + object$transform_fractions) + bparam[names(bpl), "Lower"] <- bpl + bparam[names(bpu), "Upper"] <- bpu + } + } + bparam[epnames, c("Lower", "Upper")] <- param[epnames, c("Lower", "Upper")] + + ans <- list( + version = as.character(utils::packageVersion("mkin")), + Rversion = paste(R.version$major, R.version$minor, sep="."), + date.fit = object$date, + date.summary = date(), + solution_type = object$solution_type, + warning = object$warning, + use_of_ff = object$mkinmod$use_of_ff, + error_model_algorithm = object$error_model_algorithm, + df = c(p, rdf), + covar = covar, + covar_notrans = covar_notrans, + err_mod = object$err_mod, + niter = object$iterations, + calls = object$calls, + time = object$time, + par = param, + bpar = bparam) + + if (!is.null(object$version)) { + ans$fit_version <- object$version + ans$fit_Rversion <- object$Rversion + } + + ans$diffs <- object$mkinmod$diffs + if(data) ans$data <- object$data + ans$start <- object$start + ans$start_transformed <- object$start_transformed + + ans$fixed <- object$fixed + + ans$errmin <- mkinerrmin(object, alpha = 0.05) + + if (object$calls > 0) { + if (!is.null(ans$covar)){ + Corr <- cov2cor(ans$covar) + rownames(Corr) <- colnames(Corr) <- rownames(ans$par) + ans$Corr <- Corr + } else { + warning("Could not calculate correlation; no covariance matrix") + } + } + + ans$bparms.ode <- object$bparms.ode + ep <- endpoints(object) + if (length(ep$ff) != 0) + ans$ff <- ep$ff + if (distimes) ans$distimes <- ep$distimes + if (length(ep$SFORB) != 0) ans$SFORB <- ep$SFORB + if (!is.null(object$d_3_message)) ans$d_3_message <- object$d_3_message + class(ans) <- c("summary.mkinfit", "summary.modFit") + return(ans) +} + +#' @rdname summary.mkinfit +#' @export +print.summary.mkinfit <- function(x, digits = max(3, getOption("digits") - 3), ...) { + if (is.null(x$fit_version)) { + cat("mkin version: ", x$version, "\n") + cat("R version: ", x$Rversion, "\n") + } else { + cat("mkin version used for fitting: ", x$fit_version, "\n") + cat("R version used for fitting: ", x$fit_Rversion, "\n") + } + + cat("Date of fit: ", x$date.fit, "\n") + cat("Date of summary:", x$date.summary, "\n") + + if (!is.null(x$warning)) cat("\n\nWarning:", x$warning, "\n\n") + + cat("\nEquations:\n") + nice_diffs <- gsub("^(d.*) =", "\\1/dt =", x[["diffs"]]) + writeLines(strwrap(nice_diffs, exdent = 11)) + df <- x$df + rdf <- df[2] + + cat("\nModel predictions using solution type", x$solution_type, "\n") + + cat("\nFitted using", x$calls, "model solutions performed in", x$time[["elapsed"]], "s\n") + + if (!is.null(x$err_mod)) { + cat("\nError model: ") + cat(switch(x$err_mod, + const = "Constant variance", + obs = "Variance unique to each observed variable", + tc = "Two-component variance function"), "\n") + + cat("\nError model algorithm:", x$error_model_algorithm, "\n") + if (!is.null(x$d_3_message)) cat(x$d_3_message, "\n") + } + + cat("\nStarting values for parameters to be optimised:\n") + print(x$start) + + cat("\nStarting values for the transformed parameters actually optimised:\n") + print(x$start_transformed) + + cat("\nFixed parameter values:\n") + if(length(x$fixed$value) == 0) cat("None\n") + else print(x$fixed) + + cat("\nOptimised, transformed parameters with symmetric confidence intervals:\n") + print(signif(x$par, digits = digits)) + + if (x$calls > 0) { + cat("\nParameter correlation:\n") + if (!is.null(x$covar)){ + print(x$Corr, digits = digits, ...) + } else { + cat("No covariance matrix") + } + } + + cat("\nBacktransformed parameters:\n") + cat("Confidence intervals for internally transformed parameters are asymmetric.\n") + if ((x$version) < "0.9-36") { + cat("To get the usual (questionable) t-test, upgrade mkin and repeat the fit.\n") + print(signif(x$bpar, digits = digits)) + } else { + cat("t-test (unrealistically) based on the assumption of normal distribution\n") + cat("for estimators of untransformed parameters.\n") + print(signif(x$bpar[, c(1, 3, 4, 5, 6)], digits = digits)) + } + + cat("\nFOCUS Chi2 error levels in percent:\n") + x$errmin$err.min <- 100 * x$errmin$err.min + print(x$errmin, digits=digits,...) + + printSFORB <- !is.null(x$SFORB) + if(printSFORB){ + cat("\nEstimated Eigenvalues of SFORB model(s):\n") + print(x$SFORB, digits=digits,...) + } + + printff <- !is.null(x$ff) + if(printff){ + cat("\nResulting formation fractions:\n") + print(data.frame(ff = x$ff), digits=digits,...) + } + + printdistimes <- !is.null(x$distimes) + if(printdistimes){ + cat("\nEstimated disappearance times:\n") + print(x$distimes, digits=digits,...) + } + + printdata <- !is.null(x$data) + if (printdata){ + cat("\nData:\n") + print(format(x$data, digits = digits, ...), row.names = FALSE) + } + + invisible(x) +} diff --git a/R/transform_odeparms.R b/R/transform_odeparms.R index f69f4ebd..28e58f87 100644 --- a/R/transform_odeparms.R +++ b/R/transform_odeparms.R @@ -1,180 +1,258 @@ -# Copyright (C) 2010-2014,2019 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see - -transform_odeparms <- function(parms, mkinmod, - transform_rates = TRUE, - transform_fractions = TRUE) -{ - # We need the model specification for the names of the model - # variables and the information on the sink - spec = mkinmod$spec - - # Set up container for transformed parameters - transparms <- numeric(0) - - # Do not transform initial values for state variables - state.ini.optim <- parms[grep("_0$", names(parms))] - transparms[names(state.ini.optim)] <- state.ini.optim - - # Log transformation for rate constants if requested - k <- parms[grep("^k_", names(parms))] - k__iore <- parms[grep("^k__iore_", names(parms))] - k <- c(k, k__iore) - if (length(k) > 0) { - if(transform_rates) { - transparms[paste0("log_", names(k))] <- log(k) - } else transparms[names(k)] <- k - } - - # Do not transform exponents in IORE models - N <- parms[grep("^N", names(parms))] - transparms[names(N)] <- N - - # Go through state variables and apply isometric logratio transformation to - # formation fractions if requested - mod_vars = names(spec) - for (box in mod_vars) { - f <- parms[grep(paste("^f", box, sep = "_"), names(parms))] - - if (length(f) > 0) { - if(transform_fractions) { - if (spec[[box]]$sink) { - trans_f <- ilr(c(f, 1 - sum(f))) - trans_f_names <- paste("f", box, "ilr", 1:length(trans_f), sep = "_") - transparms[trans_f_names] <- trans_f - } else { - if (length(f) > 1) { - trans_f <- ilr(f) - trans_f_names <- paste("f", box, "ilr", 1:length(trans_f), sep = "_") - transparms[trans_f_names] <- trans_f - } - } - } else { - transparms[names(f)] <- f - } - } - } - - # Transform also FOMC parameters alpha and beta, DFOP and HS rates k1 and k2 - # and HS parameter tb as well as logistic model parameters kmax, k0 and r if - # transformation of rates is requested - for (pname in c("alpha", "beta", "k1", "k2", "tb", "kmax", "k0", "r")) { - if (!is.na(parms[pname])) { - if (transform_rates) { - transparms[paste0("log_", pname)] <- log(parms[pname]) - } else { - transparms[pname] <- parms[pname] - } - } - } - - # DFOP parameter g is treated as a fraction - if (!is.na(parms["g"])) { - g <- parms["g"] - if (transform_fractions) { - transparms["g_ilr"] <- ilr(c(g, 1 - g)) - } else { - transparms["g"] <- g - } - } - - return(transparms) -} - -backtransform_odeparms <- function(transparms, mkinmod, - transform_rates = TRUE, - transform_fractions = TRUE) -{ - # We need the model specification for the names of the model - # variables and the information on the sink - spec = mkinmod$spec - - # Set up container for backtransformed parameters - parms <- numeric(0) - - # Do not transform initial values for state variables - state.ini.optim <- transparms[grep("_0$", names(transparms))] - parms[names(state.ini.optim)] <- state.ini.optim - - # Exponential transformation for rate constants - if(transform_rates) { - trans_k <- transparms[grep("^log_k_", names(transparms))] - trans_k__iore <- transparms[grep("^log_k__iore_", names(transparms))] - trans_k = c(trans_k, trans_k__iore) - if (length(trans_k) > 0) { - k_names <- gsub("^log_k", "k", names(trans_k)) - parms[k_names] <- exp(trans_k) - } - } else { - trans_k <- transparms[grep("^k_", names(transparms))] - parms[names(trans_k)] <- trans_k - trans_k__iore <- transparms[grep("^k__iore_", names(transparms))] - parms[names(trans_k__iore)] <- trans_k__iore - } - - # Do not transform exponents in IORE models - N <- transparms[grep("^N", names(transparms))] - parms[names(N)] <- N - - # Go through state variables and apply inverse isometric logratio transformation - mod_vars = names(spec) - for (box in mod_vars) { - # Get the names as used in the model - f_names = grep(paste("^f", box, sep = "_"), mkinmod$parms, value = TRUE) - # Get the formation fraction parameters - trans_f = transparms[grep(paste("^f", box, sep = "_"), names(transparms))] - if (length(trans_f) > 0) { - if(transform_fractions) { - f <- invilr(trans_f) - if (spec[[box]]$sink) { - parms[f_names] <- f[1:length(f)-1] - } else { - parms[f_names] <- f - } - } else { - parms[names(trans_f)] <- trans_f - } - } - } - - # Transform parameters also for FOMC, DFOP, HS and logistic models - for (pname in c("alpha", "beta", "k1", "k2", "tb", "kmax", "k0", "r")) { - if (transform_rates) { - pname_trans = paste0("log_", pname) - if (!is.na(transparms[pname_trans])) { - parms[pname] <- exp(transparms[pname_trans]) - } - } else { - if (!is.na(transparms[pname])) { - parms[pname] <- transparms[pname] - } - } - } - - # DFOP parameter g is treated as a fraction - if (!is.na(transparms["g_ilr"])) { - g_ilr <- transparms["g_ilr"] - parms["g"] <- invilr(g_ilr)[1] - } - if (!is.na(transparms["g"])) { - parms["g"] <- transparms["g"] - } - - return(parms) -} -# vim: set ts=2 sw=2 expandtab: +#' Functions to transform and backtransform kinetic parameters for fitting +#' +#' The transformations are intended to map parameters that should only take on +#' restricted values to the full scale of real numbers. For kinetic rate +#' constants and other paramters that can only take on positive values, a +#' simple log transformation is used. For compositional parameters, such as the +#' formations fractions that should always sum up to 1 and can not be negative, +#' the \code{\link{ilr}} transformation is used. +#' +#' The transformation of sets of formation fractions is fragile, as it supposes +#' the same ordering of the components in forward and backward transformation. +#' This is no problem for the internal use in \code{\link{mkinfit}}. +#' +#' @aliases transform_odeparms backtransform_odeparms +#' @param parms Parameters of kinetic models as used in the differential +#' equations. +#' @param transparms Transformed parameters of kinetic models as used in the +#' fitting procedure. +#' @param mkinmod The kinetic model of class \code{\link{mkinmod}}, containing +#' the names of the model variables that are needed for grouping the +#' formation fractions before \code{\link{ilr}} transformation, the parameter +#' names and the information if the pathway to sink is included in the model. +#' @param transform_rates Boolean specifying if kinetic rate constants should +#' be transformed in the model specification used in the fitting for better +#' compliance with the assumption of normal distribution of the estimator. If +#' TRUE, also alpha and beta parameters of the FOMC model are +#' log-transformed, as well as k1 and k2 rate constants for the DFOP and HS +#' models and the break point tb of the HS model. +#' @param transform_fractions Boolean specifying if formation fractions +#' constants should be transformed in the model specification used in the +#' fitting for better compliance with the assumption of normal distribution +#' of the estimator. The default (TRUE) is to do transformations. The g +#' parameter of the DFOP and HS models are also transformed, as they can also +#' be seen as compositional data. The transformation used for these +#' transformations is the \code{\link{ilr}} transformation. +#' @return A vector of transformed or backtransformed parameters with the same +#' names as the original parameters. +#' @author Johannes Ranke +#' @examples +#' +#' SFO_SFO <- mkinmod( +#' parent = list(type = "SFO", to = "m1", sink = TRUE), +#' m1 = list(type = "SFO")) +#' # Fit the model to the FOCUS example dataset D using defaults +#' fit <- mkinfit(SFO_SFO, FOCUS_2006_D, quiet = TRUE) +#' fit.s <- summary(fit) +#' # Transformed and backtransformed parameters +#' print(fit.s$par, 3) +#' print(fit.s$bpar, 3) +#' +#' \dontrun{ +#' # Compare to the version without transforming rate parameters +#' fit.2 <- mkinfit(SFO_SFO, FOCUS_2006_D, transform_rates = FALSE, quiet = TRUE) +#' fit.2.s <- summary(fit.2) +#' print(fit.2.s$par, 3) +#' print(fit.2.s$bpar, 3) +#' } +#' +#' initials <- fit$start$value +#' names(initials) <- rownames(fit$start) +#' transformed <- fit$start_transformed$value +#' names(transformed) <- rownames(fit$start_transformed) +#' transform_odeparms(initials, SFO_SFO) +#' backtransform_odeparms(transformed, SFO_SFO) +#' +#' \dontrun{ +#' # The case of formation fractions +#' SFO_SFO.ff <- mkinmod( +#' parent = list(type = "SFO", to = "m1", sink = TRUE), +#' m1 = list(type = "SFO"), +#' use_of_ff = "max") +#' +#' fit.ff <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, quiet = TRUE) +#' fit.ff.s <- summary(fit.ff) +#' print(fit.ff.s$par, 3) +#' print(fit.ff.s$bpar, 3) +#' initials <- c("f_parent_to_m1" = 0.5) +#' transformed <- transform_odeparms(initials, SFO_SFO.ff) +#' backtransform_odeparms(transformed, SFO_SFO.ff) +#' +#' # And without sink +#' SFO_SFO.ff.2 <- mkinmod( +#' parent = list(type = "SFO", to = "m1", sink = FALSE), +#' m1 = list(type = "SFO"), +#' use_of_ff = "max") +#' +#' +#' fit.ff.2 <- mkinfit(SFO_SFO.ff.2, FOCUS_2006_D, quiet = TRUE) +#' fit.ff.2.s <- summary(fit.ff.2) +#' print(fit.ff.2.s$par, 3) +#' print(fit.ff.2.s$bpar, 3) +#' } +#' +#' @export transform_odeparms +transform_odeparms <- function(parms, mkinmod, + transform_rates = TRUE, + transform_fractions = TRUE) +{ + # We need the model specification for the names of the model + # variables and the information on the sink + spec = mkinmod$spec + + # Set up container for transformed parameters + transparms <- numeric(0) + + # Do not transform initial values for state variables + state.ini.optim <- parms[grep("_0$", names(parms))] + transparms[names(state.ini.optim)] <- state.ini.optim + + # Log transformation for rate constants if requested + k <- parms[grep("^k_", names(parms))] + k__iore <- parms[grep("^k__iore_", names(parms))] + k <- c(k, k__iore) + if (length(k) > 0) { + if(transform_rates) { + transparms[paste0("log_", names(k))] <- log(k) + } else transparms[names(k)] <- k + } + + # Do not transform exponents in IORE models + N <- parms[grep("^N", names(parms))] + transparms[names(N)] <- N + + # Go through state variables and apply isometric logratio transformation to + # formation fractions if requested + mod_vars = names(spec) + for (box in mod_vars) { + f <- parms[grep(paste("^f", box, sep = "_"), names(parms))] + + if (length(f) > 0) { + if(transform_fractions) { + if (spec[[box]]$sink) { + trans_f <- ilr(c(f, 1 - sum(f))) + trans_f_names <- paste("f", box, "ilr", 1:length(trans_f), sep = "_") + transparms[trans_f_names] <- trans_f + } else { + if (length(f) > 1) { + trans_f <- ilr(f) + trans_f_names <- paste("f", box, "ilr", 1:length(trans_f), sep = "_") + transparms[trans_f_names] <- trans_f + } + } + } else { + transparms[names(f)] <- f + } + } + } + + # Transform also FOMC parameters alpha and beta, DFOP and HS rates k1 and k2 + # and HS parameter tb as well as logistic model parameters kmax, k0 and r if + # transformation of rates is requested + for (pname in c("alpha", "beta", "k1", "k2", "tb", "kmax", "k0", "r")) { + if (!is.na(parms[pname])) { + if (transform_rates) { + transparms[paste0("log_", pname)] <- log(parms[pname]) + } else { + transparms[pname] <- parms[pname] + } + } + } + + # DFOP parameter g is treated as a fraction + if (!is.na(parms["g"])) { + g <- parms["g"] + if (transform_fractions) { + transparms["g_ilr"] <- ilr(c(g, 1 - g)) + } else { + transparms["g"] <- g + } + } + + return(transparms) +} + +#' @describeIn transform_odeparms Backtransform the set of transformed parameters +#' @export backtransform_odeparms +backtransform_odeparms <- function(transparms, mkinmod, + transform_rates = TRUE, + transform_fractions = TRUE) +{ + # We need the model specification for the names of the model + # variables and the information on the sink + spec = mkinmod$spec + + # Set up container for backtransformed parameters + parms <- numeric(0) + + # Do not transform initial values for state variables + state.ini.optim <- transparms[grep("_0$", names(transparms))] + parms[names(state.ini.optim)] <- state.ini.optim + + # Exponential transformation for rate constants + if(transform_rates) { + trans_k <- transparms[grep("^log_k_", names(transparms))] + trans_k__iore <- transparms[grep("^log_k__iore_", names(transparms))] + trans_k = c(trans_k, trans_k__iore) + if (length(trans_k) > 0) { + k_names <- gsub("^log_k", "k", names(trans_k)) + parms[k_names] <- exp(trans_k) + } + } else { + trans_k <- transparms[grep("^k_", names(transparms))] + parms[names(trans_k)] <- trans_k + trans_k__iore <- transparms[grep("^k__iore_", names(transparms))] + parms[names(trans_k__iore)] <- trans_k__iore + } + + # Do not transform exponents in IORE models + N <- transparms[grep("^N", names(transparms))] + parms[names(N)] <- N + + # Go through state variables and apply inverse isometric logratio transformation + mod_vars = names(spec) + for (box in mod_vars) { + # Get the names as used in the model + f_names = grep(paste("^f", box, sep = "_"), mkinmod$parms, value = TRUE) + # Get the formation fraction parameters + trans_f = transparms[grep(paste("^f", box, sep = "_"), names(transparms))] + if (length(trans_f) > 0) { + if(transform_fractions) { + f <- invilr(trans_f) + if (spec[[box]]$sink) { + parms[f_names] <- f[1:length(f)-1] + } else { + parms[f_names] <- f + } + } else { + parms[names(trans_f)] <- trans_f + } + } + } + + # Transform parameters also for FOMC, DFOP, HS and logistic models + for (pname in c("alpha", "beta", "k1", "k2", "tb", "kmax", "k0", "r")) { + if (transform_rates) { + pname_trans = paste0("log_", pname) + if (!is.na(transparms[pname_trans])) { + parms[pname] <- exp(transparms[pname_trans]) + } + } else { + if (!is.na(transparms[pname])) { + parms[pname] <- transparms[pname] + } + } + } + + # DFOP parameter g is treated as a fraction + if (!is.na(transparms["g_ilr"])) { + g_ilr <- transparms["g_ilr"] + parms["g"] <- invilr(g_ilr)[1] + } + if (!is.na(transparms["g"])) { + parms["g"] <- transparms["g"] + } + + return(parms) +} +# vim: set ts=2 sw=2 expandtab: -- cgit v1.2.1