From 5f4a25fad9a5323611855145e6b31267b3ed9e50 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Fri, 24 Jun 2016 17:42:42 +0200 Subject: Convert main vignette to Rmd/html, add_err(), fixes --- DESCRIPTION | 2 +- GNUmakefile | 2 +- NEWS.md | 14 ++- R/add_err.R | 48 ++++++++ R/plot.mmkin.R | 15 +-- man/add_err.Rd | 98 +++++++++++++++ man/plot.mmkin.Rd | 5 + vignettes/mkin.Rmd | 167 ++++++++++++++++++++++++++ vignettes/mkin.Rnw | 161 ------------------------- vignettes/mkin.html | 301 +++++++++++++++++++++++++++++++++++++++++++++++ vignettes/mkin.pdf | Bin 160263 -> 0 bytes vignettes/references.bib | 22 +++- 12 files changed, 655 insertions(+), 180 deletions(-) create mode 100644 R/add_err.R create mode 100644 man/add_err.Rd create mode 100644 vignettes/mkin.Rmd delete mode 100644 vignettes/mkin.Rnw create mode 100644 vignettes/mkin.html delete mode 100644 vignettes/mkin.pdf diff --git a/DESCRIPTION b/DESCRIPTION index bc35262d..19173784 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: mkin Type: Package Title: Kinetic evaluation of chemical degradation data Version: 0.9.42.9000 -Date: 2016-06-17 +Date: 2016-06-24 Authors@R: c(person("Johannes", "Ranke", role = c("aut", "cre", "cph"), email = "jranke@uni-bremen.de"), person("Katrin", "Lindenberger", role = "ctb"), diff --git a/GNUmakefile b/GNUmakefile index 40be37c9..cfa7af67 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -85,7 +85,7 @@ vignettes/%.pdf: vignettes/header.tex vignettes/references.bib vignettes/%.Rnw vignettes/%.html: vignettes/mkin_vignettes.css vignettes/%.Rmd "$(RBIN)/Rscript" -e "tools::buildVignette(file = 'vignettes/$*.Rmd', dir = 'vignettes')" -vignettes: vignettes/mkin.pdf vignettes/FOCUS_D.html vignettes/FOCUS_L.html vignettes/FOCUS_Z.pdf vignettes/compiled_models.html +vignettes: vignettes/mkin.html vignettes/FOCUS_D.html vignettes/FOCUS_L.html vignettes/FOCUS_Z.pdf vignettes/compiled_models.html sd: @echo Now execute diff --git a/NEWS.md b/NEWS.md index bc593f98..6669fdb2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,22 +6,28 @@ - The title was changed to `Kinetic evaluations of chemical degradation data` -- `mkinfit`: Do not error out in cases where the fit converges, but the Jacobian for the untransformed model cost can not be estimated. Give a warning instead and return NA for the t-test results. - -- `summary.mkinfit`: Give a warning message when the covariance matrix can not be obtained. +- The main vignette `mkin` was converted to R markdown and updated -- A test has been added to containing a corresponding edge case to check that the warnings are correctly issued and the fit does not terminate. +- The function `add_err` was added to the package, making it easy to generate simulated data using an error model based on the normal distribution ### Minor changes - Remove an outdated reference to the inline package in the `compiled_models` vignette +- `mkinfit`: Do not error out in cases where the fit converges, but the Jacobian for the untransformed model cost can not be estimated. Give a warning instead and return NA for the t-test results. + +- `summary.mkinfit`: Give a warning message when the covariance matrix can not be obtained. + +- A test has been added to containing a corresponding edge case to check that the warnings are correctly issued and the fit does not terminate. + ### Bug fixes - `endpoints`: When the name of a substance degrading to a metabolite (e.g. a parent compound) used in the model formulation ended in the letter `f`, some rate parameters could be listed as formation fractions with mixed up names. These would also appear in the summary. - `mkinfit`: Check for all observed variables when checking if the user tried to fix formation fractions when fitting them using ilr transformation. +- `plot.mmkin`: Removed some leftover code that set the plot margins wrongly in the case of a single fit to be plotted, so the main title was misplaced + ## mkin 0.9.42 (2016-03-25) ### Major changes diff --git a/R/add_err.R b/R/add_err.R new file mode 100644 index 00000000..0995e634 --- /dev/null +++ b/R/add_err.R @@ -0,0 +1,48 @@ +# Copyright (C) 2015-2016 Johannes Ranke +# Contact: jranke@uni-bremen.de + +# This file is part of the R package mkin + +# mkin is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. + +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. + +# You should have received a copy of the GNU General Public License along with +# this program. If not, see + +add_err = function(prediction, sdfunc, + n = 1000, LOD = 0.1, reps = 2, + digits = 1, seed = NA) +{ + if (!is.na(seed)) set.seed(seed) + + # The output of mkinpredict is in wide format + d_long = mkin_wide_to_long(prediction, time = "time") + + # Set up the list to be returned + d_return = list() + + # Generate datasets one by one in a loop + for (i in 1:n) { + d_rep = data.frame(lapply(d_long, rep, each = 2)) + d_rep$value = rnorm(length(d_rep$value), d_rep$value, sdfunc(d_rep$value)) + + d_rep[d_rep$time == 0 & d_rep$name %in% c("M1", "M2"), "value"] <- 0 + + # Set values below the LOD to NA + d_NA <- transform(d_rep, value = ifelse(value < LOD, NA, value)) + + # Round the values for convenience + d_NA$value <- round(d_NA$value, digits) + + d_return[[i]] <- d_NA + } + + return(d_return) +} diff --git a/R/plot.mmkin.R b/R/plot.mmkin.R index 02bf4d6e..7b54be4b 100644 --- a/R/plot.mmkin.R +++ b/R/plot.mmkin.R @@ -20,12 +20,17 @@ plot.mmkin <- function(x, main = "auto", legends = 1, errmin_var = "All data", e cex = 0.7, rel.height.middle = 0.9, ...) { n.m <- nrow(x) n.d <- ncol(x) + + # We can handle either a row (different models, same dataset) + # or a column (same model, different datasets) if (n.m > 1 & n.d > 1) stop("Please select fits either for one model or for one dataset") if (n.m == 1 & n.d == 1) loop_over = "none" if (n.m > 1) loop_over <- "models" if (n.d > 1) loop_over <- "datasets" n.fits <- length(x) + # Set the main plot titles from the names of the models or the datasets + # Will be integer indexes if no other names are present in the mmkin object if (main == "auto") { main = switch(loop_over, none = paste(rownames(x), colnames(x)), @@ -34,11 +39,13 @@ plot.mmkin <- function(x, main = "auto", legends = 1, errmin_var = "All data", e } oldpar <- par(no.readonly = TRUE) + + # Set relative plot heights, so the first and the last plot are the norm + # and the middle plots (if n.fits >2) are smaller by rel.height.middle rel.heights <- if (n.fits > 2) c(1, rep(rel.height.middle, n.fits - 2), 1) else rep(1, n.fits) layout(matrix(1:(2 * n.fits), n.fits, 2, byrow = TRUE), heights = rel.heights) - #par(mfrow = c(n.fits, 2)) par(mar = c(3.0, 4.1, 4.1, 2.1)) # Reduce bottom margin by 2.1 - hides x axis legend par(cex = cex) @@ -48,12 +55,6 @@ plot.mmkin <- function(x, main = "auto", legends = 1, errmin_var = "All data", e # reduced plot height, therefore we need rel.height.middle in the layout par(mar = c(3.0, 4.1, 2.1, 2.1)) } - if (i.fit == n.fits) { - # Reduce top margin by 2 after the first plot as we have no main title, - # plot height remains about constant - par(mar = c(5.1, 4.1, 2.1, 2.1)) - - } fit <- x[[i.fit]] plot(fit, legend = legends == i.fit, ...) diff --git a/man/add_err.Rd b/man/add_err.Rd new file mode 100644 index 00000000..a0a5106f --- /dev/null +++ b/man/add_err.Rd @@ -0,0 +1,98 @@ +\name{add_err} +\alias{add_err} +\title{ + Add normally distributed errors to simulated kinetic degradation data +} +\description{ + Normally distributed errors are added to data predicted for a specific + degradation model using \code{\link{mkinpredict}}. The variance of the error + may depend on the predicted value and is specified as a standard deviation. +} +\usage{ + add_err(prediction, sdfunc, + n = 1000, LOD = 0.1, reps = 2, + digits = 1, seed = NA) +} +\arguments{ + \item{prediction}{ + A prediction from a kinetic model as produced by \code{\link{mkinpredict}}. + } + \item{sdfunc}{ + A function taking the predicted value as its only argument and returning + a standard deviation that should be used for generating the random error + terms for this value. + } + \item{n}{ + The number of datasets to be generated. + } + \item{LOD}{ + The limit of detection (LOD). Values that are below the LOD after adding + the random error will be set to NA. + } + \item{reps}{ + The number of replicates to be generated within the datasets. + } + \item{digits}{ + The number of digits to which the values will be rounded. + } + \item{seed}{ + The seed used for the generation of random numbers. If NA, the seed + is not set. + } +} +\value{ + A list of datasets compatible with \code{\link{mmkin}}, i.e. + the components of the list are datasets compatible with + \code{\link{mkinfit}}. +} +\references{ + Ranke J and Lehmann R (2015) To t-test or not to t-test, that is the question. XV Symposium on Pesticide Chemistry 2-4 September 2015, Piacenza, Italy + http://chem.uft.uni-bremen.de/ranke/posters/piacenza_2015.pdf +} +\author{ + Johannes Ranke +} +\examples{ +# The kinetic model +m_SFO_SFO <- mkinmod(parent = mkinsub("SFO", "M1"), + M1 = mkinsub("SFO"), use_of_ff = "max") + +# Generate a prediction for a specific set of parameters +sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) + +# This is the prediction used for the "Type 2 datasets" on the Piacenza poster +# from 2015 +d_SFO_SFO <- mkinpredict(m_SFO_SFO, + c(k_parent = 0.1, f_parent_to_M1 = 0.5, + k_M1 = log(2)/1000), + c(parent = 100, M1 = 0), + sampling_times) + +# Add an error term with a constant (independent of the value) standard deviation +# of 10, and generate three datasets +d_SFO_SFO_err <- add_err(d_SFO_SFO, function(x) 10, n = 3, seed = 123456789 ) + +# Name the datasets for nicer plotting +names(d_SFO_SFO_err) <- paste("Dataset", 1:3) + +# Name the model in the list of models (with only one member in this case) +# for nicer plotting later on +# Be quiet and use the faster Levenberg-Marquardt algorithm, as the datasets +# are easy and examples are run often +f_SFO_SFO <- mmkin(list("SFO-SFO" = m_SFO_SFO), + d_SFO_SFO_err, + quiet = TRUE, method.modFit = "Marq") + +plot(f_SFO_SFO) + +# We would like to inspect the fit for dataset 3 more closely +# Using double brackets makes the returned object an mkinfit object +# instead of a list of mkinfit objects, so plot.mkinfit is used +plot(f_SFO_SFO[[3]], show_residuals = TRUE) + +# If we use single brackets, we should give two indices (model and dataset), +# and plot.mmkin is used +plot(f_SFO_SFO[1, 3]) + +} +\keyword{ manip } diff --git a/man/plot.mmkin.Rd b/man/plot.mmkin.Rd index 761a4cd5..8362f16c 100644 --- a/man/plot.mmkin.Rd +++ b/man/plot.mmkin.Rd @@ -50,4 +50,9 @@ cores = 1, quiet = TRUE) plot(fits[, "FOCUS C"]) plot(fits["FOMC", ]) + + # We can also plot a single fit, if we like the way mmkin works, but then the plot + # height should be smaller than the plot width (this is not possible for the html pages + # generated by staticdocs, as far as I know). + plot(fits["FOMC", "FOCUS C"]) # same as plot(fits[1, 2]) } diff --git a/vignettes/mkin.Rmd b/vignettes/mkin.Rmd new file mode 100644 index 00000000..63d97c6b --- /dev/null +++ b/vignettes/mkin.Rmd @@ -0,0 +1,167 @@ +--- +title: mkin - Kinetic evaluation of chemical degradation data +author: Johannes Ranke +date: "`r Sys.Date()`" +output: + html_document: + toc: true + toc_float: true +bibliography: references.bib +vignette: > + %\VignetteEngine{knitr::rmarkdown} + %\VignetteIndexEntry{mkin - Kinetic evaluation of chemical degradation data} + \usepackage[utf8]{inputenc} +--- + +[Wissenschaftlicher Berater, Kronacher Str. 8, 79639 Grenzach-Wyhlen, Germany](http://www.jrwb.de)
+[Privatdozent at the University of Bremen](http://chem.uft.uni-bremen.de/ranke) + +```{r, include = FALSE} +require(knitr) +opts_chunk$set(engine='R', tidy=FALSE) +``` + +# Abstract +In the regulatory evaluation of chemical substances like plant protection +products (pesticides), biocides and other chemicals, degradation data play an +important role. For the evaluation of pesticide degradation experiments, +detailed guidance has been developed, based on nonlinear optimisation. +The `R` add-on package `mkin` [@pkg:mkin] implements fitting some of the models +recommended in this guidance from within R and calculates some statistical +measures for data series within one or more compartments, for parent and +metabolites. + +# Background + +Many approaches are possible regarding the evaluation of chemical degradation +data. + +The now deprecated `kinfit` package [@pkg:kinfit] in `R` [@rcore2016] +implements the approach recommended in the kinetics report provided by the +FOrum for Co-ordination of pesticide fate models and their USe [@FOCUS2006; +FOCUSkinetics2014] for simple data series for one parent compound in one +compartment. + +The `mkin` package [@pkg:mkin] extends this approach to data series with +transformation products, commonly termed metabolites, and to more than one +compartment. It is also possible to include back reactions, so equilibrium +reactions and equilibrium partitioning can be specified, although this +oftentimes leads to an overparameterisation of the model. + +When mkin was first published, the most commonly used tools for +fitting more complex kinetic degradation models to experimental data were +KinGUI [@schaefer2007], a MATLAB based tool with a graphical user +interface that was specifically tailored to the task and included some output +as proposed by the FOCUS Kinetics Workgroup, and ModelMaker, a general purpose +compartment based tool providing infrastructure for fitting dynamic simulation +models based on differential equations to data. + +The code was first uploaded to the BerliOS platform. When this was taken down, +the version control history was imported into the R-Forge site, where the code +is still mirrored today (see *e.g.* +[the initial commit on 11 May 2010](http://cgit.jrwb.de/mkin/commit/?id=30cbb4092f6d2d3beff5800603374a0d009ad770). + +At that time, the R package `FME` (Flexible Modelling Environment) +[@soetaert2010] was already available, and provided a good basis for +developing a package specifically tailored to the task. The remaining challenge +was to make it as easy as possible for the users (including the author of this +vignette) to specify the system of differential equations and to include the +output requested by the FOCUS guidance, such as the relative standard deviation +that has to be assumed for the residuals, such that the $\chi^2$ +goodness-of-fit test as defined by the FOCUS kinetics workgroup would pass +using an significance level $\alpha$ of 0.05. + +Also, mkin introduced using analytical solutions for parent only kinetics for +improved optimization speed. Later, Eigenvalue based solutions were +introduced to mkin for the case of linear differential equations (*i.e.* +where the FOMC or DFOP models were not used for the parent compound), greatly +improving the optimization speed for these cases. + +The possibility to specify back-reactions and a biphasic model (SFORB) for +metabolites were present in mkin from the very beginning. + +## Derived software tools + +Soon after the publication of mkin, two derived tools were published, namely +KinGUII (available from Bayer Crop Science) and CAKE (commissioned to Tessella +by Syngenta), which added a graphical user interface (GUI), and added fitting by +iteratively reweighted least squares (IRLS) and characterisation of likely +parameter distributions by Markov Chain Monte Carlo (MCMC) sampling. + +CAKE focuses on a smooth use experience, sacrificing some flexibility in the model +definition, originally allowing only two primary metabolites in parallel. +The current version 3.2 of CAKE release in March 2016 uses a basic scheme for +up to six metabolites in a flexible arrangement. + +KinGUI offers quite an even more flexible widget for specifying complex kinetic +models. Back-reactions (non-instanteneous equilibria) were not present in the +first version of KinGUII, and only simple first-order models could be specified +for transformation products. Later, starting from KinGUII version 2.1 published in ), +back-reactions and biphasic modelling of metabolites were also available in +KinGUII. + +A further graphical user interface (GUI) that has recently been brought to a decent +degree of maturity is the browser based GUI named `gmkin`. Please see its +[documentation page](http://kinfit.r-forge.r-project.org/gmkin_static) and +[manual](http://kinfit.r-forge.r-project.org/gmkin_static/vignettes/gmkin_manual.html) +for further information. + +## Recent developments + +Currently (June 2016), the main features available in `mkin` which are +not present in KinGUII or CAKE, are the speed increase by using compiled code when +a compiler is present, parallel model fitting on multicore machines using the +`mmkin` function, and the estimation of parameter confidence intervals based on +transformed parameters. These are explained in more detail below. + +# Internal parameter transformations + +For rate constants, the log +transformation is used, as proposed by Bates and Watts [-@bates1988, p. 77, +149]. Approximate intervals are constructed for the transformed rate +constants [compare @bates1988, p. 135], *i.e.* for their logarithms. +Confidence intervals for the rate constants are then obtained using the +appropriate backtransformation using the exponential function. + +In the first version of `mkin` allowing for specifying models using +formation fractions, a home-made reparameterisation was used in order to ensure +that the sum of formation fractions would not exceed unity. + +This method is still used in the current version of KinGUII (v2.1 from April +2014), with a modification that allows for fixing the pathway to sink to zero. +CAKE uses penalties in the objective function in order to enforce this +constraint. + +In 2012, an alternative reparameterisation of the formation fractions was +proposed together with René Lehmann \citep{ranke2012}, based on isometric +logratio transformation (ILR). The aim was to improve the validity of the +linear approximation of the objective function during the parameter +estimation procedure as well as in the subsequent calculation of parameter +confidence intervals. + +In the first attempt at providing improved parameter confidence intervals +introduced to \Rpackage{mkin} in 2013, confidence intervals obtained from +FME on the transformed parameters were simply all backtransformed one by one +to yield asymetric confidence intervals for the backtransformed parameters. + +However, while there is a 1:1 relation between the rate constants in the model +and the transformed parameters fitted in the model, the parameters obtained by the +isometric logratio transformation are calculated from the set of formation +fractions that quantify the paths to each of the compounds formed from a +specific parent compound, and no such 1:1 relation exists. + +Therefore, parameter confidence intervals for formation fractions obtained with +this method only appear valid for the case of a single transformation product, where +only one formation fraction is to be estimated, directly corresponding to one +component of the ilr transformed parameter. + +The confidence intervals obtained by backtransformation for the cases where a +1:1 relation between transformed and original parameter exist are considered by +the author of this vignette to be more accurate than those obtained using a +re-estimation of the Hessian matrix after backtransformation, as implemented +in the FME package. + + +# References + + diff --git a/vignettes/mkin.Rnw b/vignettes/mkin.Rnw deleted file mode 100644 index 1befe009..00000000 --- a/vignettes/mkin.Rnw +++ /dev/null @@ -1,161 +0,0 @@ -%\VignetteIndexEntry{Routines for fitting kinetic models with one or more state variables to chemical degradation data} -%\VignetteEngine{knitr::knitr} -\documentclass[12pt,a4paper]{article} -\usepackage{a4wide} -\usepackage[utf8]{inputenc} -\input{header} -\hypersetup{ - pdftitle = {mkin - Routines for fitting kinetic models with one or more state variables to chemical degradation data}, - pdfsubject = {Manuscript}, - pdfauthor = {Johannes Ranke}, - colorlinks = {true}, - linkcolor = {blue}, - citecolor = {blue}, - urlcolor = {red}, - linktocpage = {true}, -} - -\begin{document} - -<>= -require(knitr) -opts_chunk$set(engine='R', tidy=FALSE) -@ - -\title{mkin -\\ -Routines for fitting kinetic models with one or more state variables to -chemical degradation data} -\author{\textbf{Johannes Ranke} \\[0.5cm] -%EndAName -Wissenschaftlicher Berater\\ -Kronacher Str. 8, 79639 Grenzach-Wyhlen, Germany\\[0.5cm] -and\\[0.5cm] -Privatdozent at the University of Bremen\\ -} -\maketitle - -\begin{abstract} -In the regulatory evaluation of chemical substances like plant protection -products (pesticides), biocides and other chemicals, degradation data play an -important role. For the evaluation of pesticide degradation experiments, -detailed guidance has been developed, based on nonlinear optimisation. -The \RR{} add-on package \Rpackage{mkin} implements fitting some of the models -recommended in this guidance from within R and calculates some statistical -measures for data series within one or more compartments, for parent and -metabolites. -\end{abstract} - - -\thispagestyle{empty} \setcounter{page}{0} - -\clearpage - -\tableofcontents - -\textbf{Key words}: Kinetics, FOCUS, nonlinear optimisation - -\section{Introduction} -\label{intro} - -Many approaches are possible regarding the evaluation of chemical degradation -data. The \Rpackage{kinfit} package \citep{pkg:kinfit} in \RR{} -\citep{rcore2014} implements the approach recommended in the kinetics report -provided by the FOrum for Co-ordination of pesticide fate models and their -USe \citep{FOCUS2006, FOCUSkinetics2011} for simple data series for one parent -compound in one compartment. - -The \Rpackage{mkin} package \citep{pkg:mkin} extends this approach to data series -with transformation products, commonly termed metabolites, and to more than one -compartment. It is also possible to include back reactions, so equilibrium reactions -and equilibrium partitioning can be specified, although this oftentimes leads -to an overparameterisation of the model. - -When mkin was first published in May 2010, the most commonly used tools -for fitting more complex kinetic degradation models to experimental data were KinGUI -\citep{schaefer2007}, a MATLAB$^\circledR$ based tool with a graphical user -interface that was specifically tailored to the task and included some output -as proposed by the FOCUS Kinetics Workgroup, and ModelMaker, a general purpose -compartment based tool providing infrastructure for fitting dynamic simulation -models based on differential equations to data. - -At that time, the R package \Rpackage{FME} (Flexible Modelling Environment) -\citep{soetaert2010} was already available, and provided a good basis for -developing a package specifically tailored to the task. The remaining challenge -was to make it as easy as possible for the users (including the author of this -vignette) to specify the system of differential equations and to include the -output requested by the FOCUS guidance, such as the relative standard deviation -that has to be assumed for the residuals, such that the $\chi^2$ -goodness-of-fit test as defined by the FOCUS kinetics workgroup would pass -using an significance level $\alpha$ of 0.05. - -Also, mkin introduced using analytical solutions for parent only kinetics for -improved optimization speed. Later, Eigenvalue based solutions were -introduced to mkin for the case of linear differential equations (\textit{i.e.} -where the FOMC or DFOP models were not used for the parent compound), greatly -improving the optimization speed for these cases. - -Soon after the publication of mkin, two derived tools were published, namely -KinGUII (available from Bayer Crop Science) and CAKE (commissioned to Tessella -by Syngenta), which added a graphical user interface (GUI), and added fitting by -iteratively reweighted least squares (IRLS) and characterisation of likely -parameter distributions by Markov Chain Monte Carlo (MCMC) sampling. - -CAKE focuses on a smooth use experience, sacrificing some flexibility in the model -definition, allowing only two primary metabolites in parallel. KinGUI offers -quite a flexible widget for specifying complex kinetic models. Back-reactions -(non-instanteneous equilibria) were not present in the first version of -KinGUII, and only simple first-order models could be specified for -transformation products. As of May 2014 (KinGUII version 2.1), back-reactions -and biphasic modelling of metabolites are also available in KinGUII. - -Currently (May 2014), the main feature available in \Rpackage{mkin} which is -not present in KinGUII or CAKE, is the estimation of parameter confidence -intervals based on transformed parameters. For rate constants, the log -transformation is used, as proposed by Bates and Watts \citep[p. 77, p. -149]{bates1988}. Approximate intervals are constructed for the transformed rate -constants \citep[compare][p. 153]{bates1988}, \textit{i.e.} for their logarithms. -Confidence intervals for the rate constants are then obtained using the -appropriate backtransformation using the exponential function. - -In the first version of \Rpackage{mkin} allowing for specifying models using -formation fractions, a home-made reparameterisation was used in order to ensure -that the sum of formation fractions would not exceed unity. - -This method is still used in the current version of KinGUII (v2.1), with a -modification that allows for fixing the pathway to sink to zero. CAKE uses -penalties in the objective function in order to enforce this constraint. - -In 2012, an alternative reparameterisation of the formation fractions was -proposed together with René Lehmann \citep{ranke2012}, based on isometric -logratio transformation (ILR). The aim was to improve the validity of the -linear approximation of the objective function during the parameter -estimation procedure as well as in the subsequent calculation of parameter -confidence intervals. - -In the first attempt at providing improved parameter confidence intervals -introduced to \Rpackage{mkin} in 2013, confidence intervals obtained from -FME on the transformed parameters were simply all backtransformed one by one -to yield asymetric confidence intervals for the backtransformed parameters. - -However, while there is a 1:1 relation between the rate constants in the model -and the transformed parameters fitted in the model, the parameters obtained by the -isometric logratio transformation are calculated from the set of formation -fractions that quantify the paths to each of the compounds formed from a -specific parent compound, and no such 1:1 relation exists. - -Therefore, parameter confidence intervals for formation fractions obtained with -this method only appear valid for the case of a single transformation product, where -only one formation fraction is to be estimated, directly corresponding to one -component of the ilr transformed parameter. - -The confidence intervals obtained by backtransformation for the cases where a -1:1 relation between transformed and original parameter exist are considered by -the author of this vignette to be more accurate than those obtained using a -re-estimation of the Hessian matrix after backtransformation, as implemented -in the FME package. - -\bibliographystyle{plainnat} -\bibliography{references} - -\end{document} -% vim: set foldmethod=syntax: diff --git a/vignettes/mkin.html b/vignettes/mkin.html new file mode 100644 index 00000000..3d76697a --- /dev/null +++ b/vignettes/mkin.html @@ -0,0 +1,301 @@ + + + + + + + + + + + + + + + +mkin - Kinetic evaluation of chemical degradation data + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+
+
+
+
+ +
+ + + + + + + +

Wissenschaftlicher Berater, Kronacher Str. 8, 79639 Grenzach-Wyhlen, Germany
Privatdozent at the University of Bremen

+
+

Abstract

+

In the regulatory evaluation of chemical substances like plant protection products (pesticides), biocides and other chemicals, degradation data play an important role. For the evaluation of pesticide degradation experiments, detailed guidance has been developed, based on nonlinear optimisation. The R add-on package mkin (Ranke 2016) implements fitting some of the models recommended in this guidance from within R and calculates some statistical measures for data series within one or more compartments, for parent and metabolites.

+
+
+

Background

+

Many approaches are possible regarding the evaluation of chemical degradation data.

+

The now deprecated kinfit package (Ranke 2015) in R (R Development Core Team 2016) implements the approach recommended in the kinetics report provided by the FOrum for Co-ordination of pesticide fate models and their USe [FOCUS Work Group on Degradation Kinetics (2006); FOCUSkinetics2014] for simple data series for one parent compound in one compartment.

+

The mkin package (Ranke 2016) extends this approach to data series with transformation products, commonly termed metabolites, and to more than one compartment. It is also possible to include back reactions, so equilibrium reactions and equilibrium partitioning can be specified, although this oftentimes leads to an overparameterisation of the model.

+

When mkin was first published, the most commonly used tools for fitting more complex kinetic degradation models to experimental data were KinGUI (Schäfer et al. 2007), a MATLAB based tool with a graphical user interface that was specifically tailored to the task and included some output as proposed by the FOCUS Kinetics Workgroup, and ModelMaker, a general purpose compartment based tool providing infrastructure for fitting dynamic simulation models based on differential equations to data.

+

The code was first uploaded to the BerliOS platform. When this was taken down, the version control history was imported into the R-Forge site, where the code is still mirrored today (see e.g. the initial commit on 11 May 2010.

+

At that time, the R package FME (Flexible Modelling Environment) (Soetaert and Petzoldt 2010) was already available, and provided a good basis for developing a package specifically tailored to the task. The remaining challenge was to make it as easy as possible for the users (including the author of this vignette) to specify the system of differential equations and to include the output requested by the FOCUS guidance, such as the relative standard deviation that has to be assumed for the residuals, such that the \(\chi^2\) goodness-of-fit test as defined by the FOCUS kinetics workgroup would pass using an significance level \(\alpha\) of 0.05.

+

Also, mkin introduced using analytical solutions for parent only kinetics for improved optimization speed. Later, Eigenvalue based solutions were introduced to mkin for the case of linear differential equations (i.e. where the FOMC or DFOP models were not used for the parent compound), greatly improving the optimization speed for these cases.

+

The possibility to specify back-reactions and a biphasic model (SFORB) for metabolites were present in mkin from the very beginning.

+
+

Derived software tools

+

Soon after the publication of mkin, two derived tools were published, namely KinGUII (available from Bayer Crop Science) and CAKE (commissioned to Tessella by Syngenta), which added a graphical user interface (GUI), and added fitting by iteratively reweighted least squares (IRLS) and characterisation of likely parameter distributions by Markov Chain Monte Carlo (MCMC) sampling.

+

CAKE focuses on a smooth use experience, sacrificing some flexibility in the model definition, originally allowing only two primary metabolites in parallel. The current version 3.2 of CAKE release in March 2016 uses a basic scheme for up to six metabolites in a flexible arrangement.

+

KinGUI offers quite an even more flexible widget for specifying complex kinetic models. Back-reactions (non-instanteneous equilibria) were not present in the first version of KinGUII, and only simple first-order models could be specified for transformation products. Later, starting from KinGUII version 2.1 published in ), back-reactions and biphasic modelling of metabolites were also available in KinGUII.

+

A further graphical user interface (GUI) that has recently been brought to a decent degree of maturity is the browser based GUI named gmkin. Please see its documentation page and manual for further information.

+
+
+

Recent developments

+

Currently (June 2016), the main features available in mkin which are not present in KinGUII or CAKE, are the speed increase by using compiled code when a compiler is present, parallel model fitting on multicore machines using the mmkin function, and the estimation of parameter confidence intervals based on transformed parameters. These are explained in more detail below.

+
+
+
+

Internal parameter transformations

+

For rate constants, the log transformation is used, as proposed by Bates and Watts (1988, 77, 149). Approximate intervals are constructed for the transformed rate constants (compare Bates and Watts 1988, 135), i.e. for their logarithms. Confidence intervals for the rate constants are then obtained using the appropriate backtransformation using the exponential function.

+

In the first version of mkin allowing for specifying models using formation fractions, a home-made reparameterisation was used in order to ensure that the sum of formation fractions would not exceed unity.

+

This method is still used in the current version of KinGUII (v2.1 from April 2014), with a modification that allows for fixing the pathway to sink to zero. CAKE uses penalties in the objective function in order to enforce this constraint.

+

In 2012, an alternative reparameterisation of the formation fractions was proposed together with René Lehmann , based on isometric logratio transformation (ILR). The aim was to improve the validity of the linear approximation of the objective function during the parameter estimation procedure as well as in the subsequent calculation of parameter confidence intervals.

+

In the first attempt at providing improved parameter confidence intervals introduced to in 2013, confidence intervals obtained from FME on the transformed parameters were simply all backtransformed one by one to yield asymetric confidence intervals for the backtransformed parameters.

+

However, while there is a 1:1 relation between the rate constants in the model and the transformed parameters fitted in the model, the parameters obtained by the isometric logratio transformation are calculated from the set of formation fractions that quantify the paths to each of the compounds formed from a specific parent compound, and no such 1:1 relation exists.

+

Therefore, parameter confidence intervals for formation fractions obtained with this method only appear valid for the case of a single transformation product, where only one formation fraction is to be estimated, directly corresponding to one component of the ilr transformed parameter.

+

The confidence intervals obtained by backtransformation for the cases where a 1:1 relation between transformed and original parameter exist are considered by the author of this vignette to be more accurate than those obtained using a re-estimation of the Hessian matrix after backtransformation, as implemented in the FME package.

+
+
+

References

+ +
+

Bates, D., and D. Watts. 1988. Nonlinear Regression and Its Applications. Wiley-Interscience.

+

FOCUS Work Group on Degradation Kinetics. 2006. Guidance Document on Estimating Persistence and Degradation Kinetics from Environmental Fate Studies on Pesticides in EU Registration. Report of the FOCUS Work Group on Degradation Kinetics. http://focus.jrc.ec.europa.eu/dk.

+

R Development Core Team. 2016. R: A Language and Environment for Statistical Computing. Vienna, Austria: R Foundation for Statistical Computing. http://www.R-project.org.

+

Ranke, Johannes. 2015. Kinfit: Routines for Fitting Simple Kinetic Models to Chemical Degradation Data. http://CRAN.R-project.org/package=kinfit.

+

———. 2016. `mkin`: Kinetic Evaluation of Chemical Degradation Data. http://CRAN.R-project.org/package=mkin.

+

Schäfer, D., B. Mikolasch, P. Rainbird, and B. Harvey. 2007. “KinGUI: a New Kinetic Software Tool for Evaluations According to FOCUS Degradation Kinetics.” In Proceedings of the XIII Symposium Pesticide Chemistry, edited by Del Re A. A. M., Capri E., Fragoulis G., and Trevisan M., 916–23. Piacenza.

+

Soetaert, Karline, and Thomas Petzoldt. 2010. “Inverse Modelling, Sensitivity and Monte Carlo Analysis in R Using Package FME.” Journal of Statistical Software 33 (3): 1–28. http://www.jstatsoft.org/v33/i03/.

+
+
+ + + +
+
+ +
+ + + + + + + + diff --git a/vignettes/mkin.pdf b/vignettes/mkin.pdf deleted file mode 100644 index 4e3863d6..00000000 Binary files a/vignettes/mkin.pdf and /dev/null differ diff --git a/vignettes/references.bib b/vignettes/references.bib index 88caea60..a9b39725 100644 --- a/vignettes/references.bib +++ b/vignettes/references.bib @@ -19,6 +19,17 @@ url = {http://focus.jrc.ec.europa.eu/dk} } +@MANUAL{FOCUSkinetics2014, + title = {Generic guidance for estimating persistence and degradation kinetics + from environmental fate studies on pesticides in EU registration}, + author = {{FOCUS Work Group on Degradation Kinetics}}, + edition = {1.1}, + month = {December}, + year = {2014}, + file = {FOCUS kinetics 2011 Generic guidance:/home/ranke/dok/orgs/focus/FOCUSkineticsvc_1_0_Nov23.pdf:PDF}, + url = {http://focus.jrc.ec.europa.eu/dk} +} + @MANUAL{FOCUS2006, title = {Guidance Document on Estimating Persistence and Degradation Kinetics from Environmental Fate Studies on Pesticides in EU Registration. @@ -29,12 +40,12 @@ url = {http://focus.jrc.ec.europa.eu/dk} } -@MANUAL{rcore2014, +@MANUAL{rcore2016, title = {\textsf{R}: A Language and Environment for Statistical Computing}, author = {{R Development Core Team}}, organization = {R Foundation for Statistical Computing}, address = {Vienna, Austria}, - year = {2014}, + year = {2016}, note = {{ISBN} 3-900051-07-0}, url = {http://www.R-project.org} } @@ -43,15 +54,14 @@ title = {kinfit: {R}outines for fitting simple kinetic models to chemical degradation data}, author = {Johannes Ranke}, - year = {2013}, + year = {2015}, url = {http://CRAN.R-project.org/package=kinfit} } @MANUAL{pkg:mkin, - title = {mkin: {R}outines for fitting kinetic models with one or more state - variables to chemical degradation data}, + title = {`mkin`: {K}inetic evaluation of chemical degradation data}, author = {Johannes Ranke}, - year = {2013}, + year = {2016}, url = {http://CRAN.R-project.org/package=mkin} } -- cgit v1.2.1