From 218a9c55bd80fb708b15fa7196422f759bfe4b27 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Wed, 13 May 2020 16:20:23 +0200 Subject: Further formatting improvement of benchmark vignette Also, use .rmd extension instead of .Rmd for vignettes. --- GNUmakefile | 10 +- docs/articles/web_only/benchmarks.html | 146 ++++++++---------- docs/pkgdown.yml | 2 +- vignettes/FOCUS_D.Rmd | 73 --------- vignettes/FOCUS_D.rmd | 73 +++++++++ vignettes/FOCUS_L.Rmd | 271 --------------------------------- vignettes/FOCUS_L.rmd | 271 +++++++++++++++++++++++++++++++++ vignettes/mkin.Rmd | 238 ----------------------------- vignettes/mkin.rmd | 238 +++++++++++++++++++++++++++++ vignettes/twa.Rmd | 79 ---------- vignettes/twa.rmd | 79 ++++++++++ vignettes/web_only/FOCUS_Z.Rmd | 255 ------------------------------- vignettes/web_only/FOCUS_Z.rmd | 255 +++++++++++++++++++++++++++++++ vignettes/web_only/NAFTA_examples.Rmd | 243 ----------------------------- vignettes/web_only/NAFTA_examples.rmd | 243 +++++++++++++++++++++++++++++ vignettes/web_only/benchmarks.Rmd | 150 ------------------ vignettes/web_only/benchmarks.html | 136 +++++++---------- vignettes/web_only/benchmarks.rmd | 157 +++++++++++++++++++ vignettes/web_only/compiled_models.Rmd | 141 ----------------- vignettes/web_only/compiled_models.rmd | 141 +++++++++++++++++ vignettes/web_only/mkin_benchmarks.rda | Bin 854 -> 854 bytes 21 files changed, 1580 insertions(+), 1621 deletions(-) delete mode 100644 vignettes/FOCUS_D.Rmd create mode 100644 vignettes/FOCUS_D.rmd delete mode 100644 vignettes/FOCUS_L.Rmd create mode 100644 vignettes/FOCUS_L.rmd delete mode 100644 vignettes/mkin.Rmd create mode 100644 vignettes/mkin.rmd delete mode 100644 vignettes/twa.Rmd create mode 100644 vignettes/twa.rmd delete mode 100644 vignettes/web_only/FOCUS_Z.Rmd create mode 100644 vignettes/web_only/FOCUS_Z.rmd delete mode 100644 vignettes/web_only/NAFTA_examples.Rmd create mode 100644 vignettes/web_only/NAFTA_examples.rmd delete mode 100644 vignettes/web_only/benchmarks.Rmd create mode 100644 vignettes/web_only/benchmarks.rmd delete mode 100644 vignettes/web_only/compiled_models.Rmd create mode 100644 vignettes/web_only/compiled_models.rmd diff --git a/GNUmakefile b/GNUmakefile index abc115af..d2476218 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -89,15 +89,15 @@ testcheck: test check README.html: README.md "$(RBIN)/Rscript" -e "rmarkdown::render('README.md', output_format = 'html_document', output_options = list(mathjax = NULL))" -vignettes/%.html: vignettes/mkin_vignettes.css vignettes/references.bib vignettes/%.Rmd - "$(RBIN)/Rscript" -e "tools::buildVignette(file = 'vignettes/$*.Rmd', dir = 'vignettes')" +vignettes/%.html: vignettes/mkin_vignettes.css vignettes/references.bib vignettes/%.rmd + "$(RBIN)/Rscript" -e "tools::buildVignette(file = 'vignettes/$*.rmd', dir = 'vignettes')" vignettes: vignettes/mkin.html vignettes/FOCUS_D.html vignettes/FOCUS_L.html vignettes/twa.html -vignettes/web_only/%.html: vignettes/references.bib vignettes/web_only/%.Rmd - "$(RBIN)/Rscript" -e "tools::buildVignette(file = 'vignettes/web_only/$*.Rmd', dir = 'vignettes/web_only')" +vignettes/web_only/%.html: vignettes/references.bib vignettes/web_only/%.rmd + "$(RBIN)/Rscript" -e "tools::buildVignette(file = 'vignettes/web_only/$*.rmd', dir = 'vignettes/web_only')" -articles: vignettes/web_only/FOCUS_Z.html vignettes/web_only/compiled_models.html +articles: vignettes/web_only/FOCUS_Z.html vignettes/web_only/compiled_models.html vignettes/web_only/benchmarks.html pd: roxygen "$(RBIN)/Rscript" -e "pkgdown::build_site(run_dont_run = TRUE, lazy = TRUE)" diff --git a/docs/articles/web_only/benchmarks.html b/docs/articles/web_only/benchmarks.html index b0bb5196..202ec25f 100644 --- a/docs/articles/web_only/benchmarks.html +++ b/docs/articles/web_only/benchmarks.html @@ -100,38 +100,21 @@

Benchmark timings for mkin

Johannes Ranke

-

2020-05-12

+

2020-05-13

- Source: vignettes/web_only/benchmarks.Rmd - + Source: vignettes/web_only/benchmarks.rmd + -

Each system is characterized by its CPU type, the operating system type and the mkin version. Currently only values for one system are available.

-
cpu_model <- benchmarkme::get_cpu()$model_name
-operating_system <- Sys.info()[["sysname"]]
-mkin_version <- as.character(packageVersion("mkin"))
-system_string <- paste0(operating_system, ", ", cpu_model, ", mkin version ", mkin_version)
-load("~/git/mkin/vignettes/web_only/mkin_benchmarks.rda")
-mkin_benchmarks[system_string, c("CPU", "OS", "mkin")] <-
-  c(cpu_model, operating_system, mkin_version)
-
-if (mkin_version > "0.9.48.1") {
-  mmkin_bench <- function(models, datasets, error_model = "const") {
-    mmkin(models, datasets, error_model = error_model, cores = 1, quiet = TRUE)
-  }
-} else {
-  mmkin_bench <- function(models, datasets, error_model = NULL) {
-    mmkin(models, datasets, reweight.method = error_model, cores = 1, quiet = TRUE)
-  }
-}
+

Each system is characterized by its CPU type, the operating system type and the mkin version. Currently only values for one system are available. A compiler was available, so if no analytical solution was available, compiled ODE models are used.

Test cases

Parent only:

-
FOCUS_C <- FOCUS_2006_C
+
FOCUS_C <- FOCUS_2006_C
 FOCUS_D <- subset(FOCUS_2006_D, value != 0)
 parent_datasets <- list(FOCUS_C, FOCUS_D)
 
@@ -139,25 +122,22 @@
 t2 <- system.time(mmkin_bench(c("SFO", "FOMC", "DFOP", "HS"), parent_datasets,
     error_model = "tc"))[["elapsed"]]

One metabolite:

-
SFO_SFO <- mkinmod(
+
SFO_SFO <- mkinmod(
   parent = mkinsub("SFO", "m1"),
-  m1 = mkinsub("SFO"))
-
## Successfully compiled differential equation model from auto-generated C code.
-
FOMC_SFO <- mkinmod(
+  m1 = mkinsub("SFO"))
+FOMC_SFO <- mkinmod(
   parent = mkinsub("FOMC", "m1"),
-  m1 = mkinsub("SFO"))
-
## Successfully compiled differential equation model from auto-generated C code.
-
DFOP_SFO <- mkinmod(
+  m1 = mkinsub("SFO"))
+DFOP_SFO <- mkinmod(
   parent = mkinsub("FOMC", "m1"),
-  m1 = mkinsub("SFO"))
-
## Successfully compiled differential equation model from auto-generated C code.
-
t3 <- system.time(mmkin_bench(list(SFO_SFO, FOMC_SFO, DFOP_SFO), list(FOCUS_D)))[["elapsed"]]
+  m1 = mkinsub("SFO"))
+t3 <- system.time(mmkin_bench(list(SFO_SFO, FOMC_SFO, DFOP_SFO), list(FOCUS_D)))[["elapsed"]]
 t4 <- system.time(mmkin_bench(list(SFO_SFO, FOMC_SFO, DFOP_SFO), list(FOCUS_D),
     error_model = "tc"))[["elapsed"]]
 t5 <- system.time(mmkin_bench(list(SFO_SFO, FOMC_SFO, DFOP_SFO), list(FOCUS_D),
     error_model = "obs"))[["elapsed"]]

Two metabolites, synthetic data:

-
m_synth_SFO_lin <- mkinmod(parent = mkinsub("SFO", "M1"),
+
m_synth_SFO_lin <- mkinmod(parent = mkinsub("SFO", "M1"),
                            M1 = mkinsub("SFO", "M2"),
                            M2 = mkinsub("SFO"),
                            use_of_ff = "max", quiet = TRUE)
@@ -183,57 +163,55 @@
     error_model = "obs"))[["elapsed"]]
 t11 <- system.time(mmkin_bench(list(m_synth_DFOP_par), list(DFOP_par_c),
     error_model = "obs"))[["elapsed"]]
-
mkin_benchmarks[system_string, paste0("t", 1:11)] <-
+
mkin_benchmarks[system_string, paste0("t", 1:11)] <-
   c(t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
 save(mkin_benchmarks, file = "~/git/mkin/vignettes/web_only/mkin_benchmarks.rda")

Results

-

Currently, we only have benchmark information on one system, therefore only the mkin version is shown with the results below. Timings are in seconds, shorter is better.

-
rownames(mkin_benchmarks) <- as.character(mkin_benchmarks$mkin)
+

Currently, we only have benchmark information on one system, therefore only the mkin version is shown with the results below. Timings are in seconds, shorter is better. All results were obtained by serial, i.e. not using multiple computing cores.

Benchmarks for all available error models are shown.

Parent only

-

Constant variance and two-component error model:

-
kable(mkin_benchmarks[, c("t1", "t2")])
+

Constant variance (t1) and two-component error model (t2) for four models fitted to two datasets, i.e. eight fits for each test.

- - - + + + - + - + - + - + - + - - - + + +
t1t2mkin versiont1 [s]t2 [s]
0.9.48.10.9.48.1 3.610 11.019
0.9.49.10.9.49.1 8.184 22.889
0.9.49.20.9.49.2 7.064 12.558
0.9.49.30.9.49.3 7.296 21.239
0.9.49.40.9.49.4 5.936 20.545
0.9.50.21.6593.9000.9.50.21.7043.909
@@ -241,51 +219,50 @@

One metabolite

-

Constant variance, variance by variable and two-component error model:

-
kable(mkin_benchmarks[, c("t3", "t4", "t5")])
+

Constant variance (t3), two-component error model (t4), and variance by variable (t5) for three models fitted to one dataset, i.e. three fits for each test.

- - - - + + + + - + - + - + - + - + - - - - + + + +
t3t4t5mkin versiont3 [s]t4 [s]t5 [s]
0.9.48.10.9.48.1 3.764 14.347 9.495
0.9.49.10.9.49.1 4.649 13.789 6.395
0.9.49.20.9.49.2 4.786 8.461 5.675
0.9.49.30.9.49.3 4.510 13.805 7.386
0.9.49.40.9.49.4 4.446 15.335 6.002
0.9.50.21.3896.1062.7160.9.50.21.3706.1472.747
@@ -293,21 +270,20 @@

Two metabolites

-

Two different datasets, for each constant variance, variance by variable and two-component error model are shown:

-
kable(mkin_benchmarks[, paste0("t", 6:11)])
+

Constant variance (t6 and t7), two-component error model (t8 and t9), and variance by variable (t10 and t11) for one model fitted to one dataset, i.e. one fit for each test.

- - - - - - - + + + + + + + - + @@ -316,7 +292,7 @@ - + @@ -325,7 +301,7 @@ - + @@ -334,7 +310,7 @@ - + @@ -343,7 +319,7 @@ - + @@ -352,13 +328,13 @@ - - - - - - - + + + + + + +
t6t7t8t9t10t11mkin versiont6 [s]t7 [s]t8 [s]t9 [s]t10 [s]t11 [s]
0.9.48.10.9.48.1 2.623 4.587 7.52531.267
0.9.49.10.9.49.1 2.542 4.128 4.6325.636
0.9.49.20.9.49.2 2.723 4.478 4.8625.574
0.9.49.30.9.49.3 2.643 4.374 7.0207.365
0.9.49.40.9.49.4 2.635 4.259 4.7375.626
0.9.50.20.7511.2091.2702.8592.0262.9630.9.50.20.7611.2131.2872.8862.0542.984
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index cd82d482..922e556c 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -10,7 +10,7 @@ articles: NAFTA_examples: web_only/NAFTA_examples.html benchmarks: web_only/benchmarks.html compiled_models: web_only/compiled_models.html -last_built: 2020-05-12T17:10Z +last_built: 2020-05-13T14:16Z urls: reference: https://pkgdown.jrwb.de/mkin/reference article: https://pkgdown.jrwb.de/mkin/articles diff --git a/vignettes/FOCUS_D.Rmd b/vignettes/FOCUS_D.Rmd deleted file mode 100644 index 02c2c467..00000000 --- a/vignettes/FOCUS_D.Rmd +++ /dev/null @@ -1,73 +0,0 @@ ---- -title: Example evaluation of FOCUS Example Dataset D -author: Johannes Ranke -date: "`r Sys.Date()`" -output: -rmarkdown::html_vignette: - mathjax: null -vignette: > - %\VignetteIndexEntry{Example evaluation of FOCUS Example Dataset D} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE, message = FALSE} -library(knitr) -opts_chunk$set(tidy = FALSE, cache = FALSE) -library(mkin) -``` - -This is just a very simple vignette showing how to fit a degradation model for a parent -compound with one transformation product using `mkin`. After loading the -library we look at the data. We have observed concentrations in the column named -`value` at the times specified in column `time` for the two observed variables -named `parent` and `m1`. - - -```{r data} -library(mkin, quietly = TRUE) -print(FOCUS_2006_D) -``` - -Next we specify the degradation model: The parent compound degrades with simple first-order -kinetics (SFO) to one metabolite named m1, which also degrades with SFO kinetics. - -The call to mkinmod returns a degradation model. The differential equations represented in -R code can be found in the character vector `$diffs` of the `mkinmod` object. If -a C compiler (gcc) is installed and functional, the differential equation model will -be compiled from auto-generated C code. - -```{r model} -SFO_SFO <- mkinmod(parent = mkinsub("SFO", "m1"), m1 = mkinsub("SFO")) -print(SFO_SFO$diffs) -``` - -We do the fitting without progress report (`quiet = TRUE`). - - -```{r fit} -fit <- mkinfit(SFO_SFO, FOCUS_2006_D, quiet = TRUE) -``` - -A plot of the fit including a residual plot for both observed variables is obtained -using the `plot_sep` method for `mkinfit` objects, which shows separate graphs for -all compounds and their residuals. - -```{r plot, fig.height = 6, fig.width = 8} -plot_sep(fit, lpos = c("topright", "bottomright")) -``` - -Confidence intervals for the parameter estimates are obtained using the `mkinparplot` function. - - -```{r plot_2, fig.height = 4, fig.width = 8} -mkinparplot(fit) -``` - -A comprehensive report of the results is obtained using the `summary` method for `mkinfit` -objects. - - -```{r} -summary(fit) -``` diff --git a/vignettes/FOCUS_D.rmd b/vignettes/FOCUS_D.rmd new file mode 100644 index 00000000..b857a3c3 --- /dev/null +++ b/vignettes/FOCUS_D.rmd @@ -0,0 +1,73 @@ +--- +title: Example evaluation of FOCUS Example Dataset D +author: Johannes Ranke +date: "`r Sys.Date()`" +output: +rmarkdown::html_vignette: + mathjax: null +vignette: > + %\VignetteIndexEntry{Example evaluation of FOCUS Example Dataset D} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE, message = FALSE} +library(knitr) +opts_chunk$set(tidy = FALSE, cache = FALSE) +library(mkin) +``` + +This is just a very simple vignette showing how to fit a degradation model for a parent +compound with one transformation product using `mkin`. After loading the +library we look at the data. We have observed concentrations in the column named +`value` at the times specified in column `time` for the two observed variables +named `parent` and `m1`. + + +```{r data} +library(mkin, quietly = TRUE) +print(FOCUS_2006_D) +``` + +Next we specify the degradation model: The parent compound degrades with simple first-order +kinetics (SFO) to one metabolite named m1, which also degrades with SFO kinetics. + +The call to mkinmod returns a degradation model. The differential equations represented in +R code can be found in the character vector `$diffs` of the `mkinmod` object. If +a C compiler (gcc) is installed and functional, the differential equation model will +be compiled from auto-generated C code. + +```{r model} +SFO_SFO <- mkinmod(parent = mkinsub("SFO", "m1"), m1 = mkinsub("SFO")) +print(SFO_SFO$diffs) +``` + +We do the fitting without progress report (`quiet = TRUE`). + + +```{r fit} +fit <- mkinfit(SFO_SFO, FOCUS_2006_D, quiet = TRUE) +``` + +A plot of the fit including a residual plot for both observed variables is obtained +using the `plot_sep` method for `mkinfit` objects, which shows separate graphs for +all compounds and their residuals. + +```{r plot, fig.height = 6, fig.width = 8} +plot_sep(fit, lpos = c("topright", "bottomright")) +``` + +Confidence intervals for the parameter estimates are obtained using the `mkinparplot` function. + + +```{r plot_2, fig.height = 4, fig.width = 8} +mkinparplot(fit) +``` + +A comprehensive report of the results is obtained using the `summary` method for `mkinfit` +objects. + + +```{r} +summary(fit) +``` diff --git a/vignettes/FOCUS_L.Rmd b/vignettes/FOCUS_L.Rmd deleted file mode 100644 index fa6155d2..00000000 --- a/vignettes/FOCUS_L.Rmd +++ /dev/null @@ -1,271 +0,0 @@ ---- -title: "Example evaluation of FOCUS Laboratory Data L1 to L3" -author: "Johannes Ranke" -date: "`r Sys.Date()`" -output: - html_document: - toc: true - toc_float: - collapsed: false - mathjax: null - fig_retina: null -references: -- id: ranke2014 - title: Prüfung und Validierung von Modellierungssoftware als Alternative zu - ModelMaker 4.0 - author: - - family: Ranke - given: Johannes - type: report - issued: - year: 2014 - number: "Umweltbundesamt Projektnummer 27452" -vignette: > - %\VignetteIndexEntry{Example evaluation of FOCUS Laboratory Data L1 to L3} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -library(knitr) -opts_chunk$set(tidy = FALSE, cache = FALSE) -``` - -# Laboratory Data L1 - -The following code defines example dataset L1 from the FOCUS kinetics -report, p. 284: - -```{r} -library("mkin", quietly = TRUE) -FOCUS_2006_L1 = data.frame( - t = rep(c(0, 1, 2, 3, 5, 7, 14, 21, 30), each = 2), - parent = c(88.3, 91.4, 85.6, 84.5, 78.9, 77.6, - 72.0, 71.9, 50.3, 59.4, 47.0, 45.1, - 27.7, 27.3, 10.0, 10.4, 2.9, 4.0)) -FOCUS_2006_L1_mkin <- mkin_wide_to_long(FOCUS_2006_L1) -``` - -Here we use the assumptions of simple first order (SFO), the case of declining -rate constant over time (FOMC) and the case of two different phases of the -kinetics (DFOP). For a more detailed discussion of the models, please see the -FOCUS kinetics report. - -Since mkin version 0.9-32 (July 2014), we can use shorthand notation like `"SFO"` -for parent only degradation models. The following two lines fit the model and -produce the summary report of the model fit. This covers the numerical analysis -given in the FOCUS report. - -```{r} -m.L1.SFO <- mkinfit("SFO", FOCUS_2006_L1_mkin, quiet = TRUE) -summary(m.L1.SFO) -``` - -A plot of the fit is obtained with the plot function for mkinfit objects. - -```{r fig.width = 6, fig.height = 5} -plot(m.L1.SFO, show_errmin = TRUE, main = "FOCUS L1 - SFO") -``` - -The residual plot can be easily obtained by - -```{r fig.width = 6, fig.height = 5} -mkinresplot(m.L1.SFO, ylab = "Observed", xlab = "Time") -``` - -For comparison, the FOMC model is fitted as well, and the $\chi^2$ error level -is checked. - -```{r fig.width = 6, fig.height = 5} -m.L1.FOMC <- mkinfit("FOMC", FOCUS_2006_L1_mkin, quiet=TRUE) -plot(m.L1.FOMC, show_errmin = TRUE, main = "FOCUS L1 - FOMC") -summary(m.L1.FOMC, data = FALSE) -``` - -We get a warning that the default optimisation algorithm `Port` did not converge, which -is an indication that the model is overparameterised, *i.e.* contains too many -parameters that are ill-defined as a consequence. - -And in fact, due to the higher number of parameters, and the lower number of -degrees of freedom of the fit, the $\chi^2$ error level is actually higher for -the FOMC model (3.6%) than for the SFO model (3.4%). Additionally, the -parameters `log_alpha` and `log_beta` internally fitted in the model have -excessive confidence intervals, that span more than 25 orders of magnitude (!) -when backtransformed to the scale of `alpha` and `beta`. Also, the t-test -for significant difference from zero does not indicate such a significant difference, -with p-values greater than 0.1, and finally, the parameter correlation of `log_alpha` -and `log_beta` is 1.000, clearly indicating that the model is overparameterised. - -The $\chi^2$ error levels reported in Appendix 3 and Appendix 7 to the FOCUS -kinetics report are rounded to integer percentages and partly deviate by one -percentage point from the results calculated by mkin. The reason for -this is not known. However, mkin gives the same $\chi^2$ error levels -as the kinfit package and the calculation routines of the kinfit package have -been extensively compared to the results obtained by the KinGUI -software, as documented in the kinfit package vignette. KinGUI was the first -widely used standard package in this field. Also, the calculation of -$\chi^2$ error levels was compared with KinGUII, CAKE and DegKin manager in -a project sponsored by the German Umweltbundesamt [@ranke2014]. - -# Laboratory Data L2 - -The following code defines example dataset L2 from the FOCUS kinetics -report, p. 287: - -```{r} -FOCUS_2006_L2 = data.frame( - t = rep(c(0, 1, 3, 7, 14, 28), each = 2), - parent = c(96.1, 91.8, 41.4, 38.7, - 19.3, 22.3, 4.6, 4.6, - 2.6, 1.2, 0.3, 0.6)) -FOCUS_2006_L2_mkin <- mkin_wide_to_long(FOCUS_2006_L2) -``` - -## SFO fit for L2 - -Again, the SFO model is fitted and the result is plotted. The residual plot -can be obtained simply by adding the argument `show_residuals` to the plot -command. - -```{r fig.width = 7, fig.height = 6} -m.L2.SFO <- mkinfit("SFO", FOCUS_2006_L2_mkin, quiet=TRUE) -plot(m.L2.SFO, show_residuals = TRUE, show_errmin = TRUE, - main = "FOCUS L2 - SFO") -``` - -The $\chi^2$ error level of 14% suggests that the model does not fit very well. -This is also obvious from the plots of the fit, in which we have included -the residual plot. - -In the FOCUS kinetics report, it is stated that there is no apparent systematic -error observed from the residual plot up to the measured DT90 (approximately at -day 5), and there is an underestimation beyond that point. - -We may add that it is difficult to judge the random nature of the residuals just -from the three samplings at days 0, 1 and 3. Also, it is not clear _a -priori_ why a consistent underestimation after the approximate DT90 should be -irrelevant. However, this can be rationalised by the fact that the FOCUS fate -models generally only implement SFO kinetics. - -## FOMC fit for L2 - -For comparison, the FOMC model is fitted as well, and the $\chi^2$ error level -is checked. - -```{r fig.width = 7, fig.height = 6} -m.L2.FOMC <- mkinfit("FOMC", FOCUS_2006_L2_mkin, quiet = TRUE) -plot(m.L2.FOMC, show_residuals = TRUE, - main = "FOCUS L2 - FOMC") -summary(m.L2.FOMC, data = FALSE) -``` - -The error level at which the $\chi^2$ test passes is much lower in this case. -Therefore, the FOMC model provides a better description of the data, as less -experimental error has to be assumed in order to explain the data. - -## DFOP fit for L2 - -Fitting the four parameter DFOP model further reduces the $\chi^2$ error level. - -```{r fig.width = 7, fig.height = 6} -m.L2.DFOP <- mkinfit("DFOP", FOCUS_2006_L2_mkin, quiet = TRUE) -plot(m.L2.DFOP, show_residuals = TRUE, show_errmin = TRUE, - main = "FOCUS L2 - DFOP") -summary(m.L2.DFOP, data = FALSE) -``` - -Here, the DFOP model is clearly the best-fit model for dataset L2 based on the -chi^2 error level criterion. However, the failure to calculate the covariance -matrix indicates that the parameter estimates correlate excessively. Therefore, -the FOMC model may be preferred for this dataset. - -# Laboratory Data L3 - -The following code defines example dataset L3 from the FOCUS kinetics report, -p. 290. - -```{r} -FOCUS_2006_L3 = data.frame( - t = c(0, 3, 7, 14, 30, 60, 91, 120), - parent = c(97.8, 60, 51, 43, 35, 22, 15, 12)) -FOCUS_2006_L3_mkin <- mkin_wide_to_long(FOCUS_2006_L3) -``` - -## Fit multiple models - -As of mkin version 0.9-39 (June 2015), we can fit several models to -one or more datasets in one call to the function `mmkin`. The datasets -have to be passed in a list, in this case a named list holding only -the L3 dataset prepared above. - -```{r fig.height = 8} -# Only use one core here, not to offend the CRAN checks -mm.L3 <- mmkin(c("SFO", "FOMC", "DFOP"), cores = 1, - list("FOCUS L3" = FOCUS_2006_L3_mkin), quiet = TRUE) -plot(mm.L3) -``` - -The $\chi^2$ error level of 21% as well as the plot suggest that the SFO model -does not fit very well. The FOMC model performs better, with an -error level at which the $\chi^2$ test passes of 7%. Fitting the four -parameter DFOP model further reduces the $\chi^2$ error level -considerably. - -## Accessing mmkin objects - -The objects returned by mmkin are arranged like a matrix, with -models as a row index and datasets as a column index. - -We can extract the summary and plot for *e.g.* the DFOP fit, -using square brackets for indexing which will result in the use of -the summary and plot functions working on mkinfit objects. - -```{r fig.height = 5} -summary(mm.L3[["DFOP", 1]]) -plot(mm.L3[["DFOP", 1]], show_errmin = TRUE) -``` - -Here, a look to the model plot, the confidence intervals of the parameters -and the correlation matrix suggest that the parameter estimates are reliable, and -the DFOP model can be used as the best-fit model based on the $\chi^2$ error -level criterion for laboratory data L3. - -This is also an example where the standard t-test for the parameter `g_ilr` is -misleading, as it tests for a significant difference from zero. In this case, -zero appears to be the correct value for this parameter, and the confidence -interval for the backtransformed parameter `g` is quite narrow. - -# Laboratory Data L4 - -The following code defines example dataset L4 from the FOCUS kinetics -report, p. 293: - -```{r} -FOCUS_2006_L4 = data.frame( - t = c(0, 3, 7, 14, 30, 60, 91, 120), - parent = c(96.6, 96.3, 94.3, 88.8, 74.9, 59.9, 53.5, 49.0)) -FOCUS_2006_L4_mkin <- mkin_wide_to_long(FOCUS_2006_L4) -``` - -Fits of the SFO and FOMC models, plots and summaries are produced below: - -```{r fig.height = 6} -# Only use one core here, not to offend the CRAN checks -mm.L4 <- mmkin(c("SFO", "FOMC"), cores = 1, - list("FOCUS L4" = FOCUS_2006_L4_mkin), - quiet = TRUE) -plot(mm.L4) -``` - -The $\chi^2$ error level of 3.3% as well as the plot suggest that the SFO model -fits very well. The error level at which the $\chi^2$ test passes is slightly -lower for the FOMC model. However, the difference appears negligible. - - -```{r fig.height = 8} -summary(mm.L4[["SFO", 1]], data = FALSE) -summary(mm.L4[["FOMC", 1]], data = FALSE) -``` - - -# References diff --git a/vignettes/FOCUS_L.rmd b/vignettes/FOCUS_L.rmd new file mode 100644 index 00000000..e017b674 --- /dev/null +++ b/vignettes/FOCUS_L.rmd @@ -0,0 +1,271 @@ +--- +title: "Example evaluation of FOCUS Laboratory Data L1 to L3" +author: "Johannes Ranke" +date: "`r Sys.Date()`" +output: + html_document: + toc: true + toc_float: + collapsed: false + mathjax: null + fig_retina: null +references: +- id: ranke2014 + title: Prüfung und Validierung von Modellierungssoftware als Alternative zu + ModelMaker 4.0 + author: + - family: Ranke + given: Johannes + type: report + issued: + year: 2014 + number: "Umweltbundesamt Projektnummer 27452" +vignette: > + %\VignetteIndexEntry{Example evaluation of FOCUS Laboratory Data L1 to L3} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +library(knitr) +opts_chunk$set(tidy = FALSE, cache = FALSE) +``` + +# Laboratory Data L1 + +The following code defines example dataset L1 from the FOCUS kinetics +report, p. 284: + +```{r} +library("mkin", quietly = TRUE) +FOCUS_2006_L1 = data.frame( + t = rep(c(0, 1, 2, 3, 5, 7, 14, 21, 30), each = 2), + parent = c(88.3, 91.4, 85.6, 84.5, 78.9, 77.6, + 72.0, 71.9, 50.3, 59.4, 47.0, 45.1, + 27.7, 27.3, 10.0, 10.4, 2.9, 4.0)) +FOCUS_2006_L1_mkin <- mkin_wide_to_long(FOCUS_2006_L1) +``` + +Here we use the assumptions of simple first order (SFO), the case of declining +rate constant over time (FOMC) and the case of two different phases of the +kinetics (DFOP). For a more detailed discussion of the models, please see the +FOCUS kinetics report. + +Since mkin version 0.9-32 (July 2014), we can use shorthand notation like `"SFO"` +for parent only degradation models. The following two lines fit the model and +produce the summary report of the model fit. This covers the numerical analysis +given in the FOCUS report. + +```{r} +m.L1.SFO <- mkinfit("SFO", FOCUS_2006_L1_mkin, quiet = TRUE) +summary(m.L1.SFO) +``` + +A plot of the fit is obtained with the plot function for mkinfit objects. + +```{r fig.width = 6, fig.height = 5} +plot(m.L1.SFO, show_errmin = TRUE, main = "FOCUS L1 - SFO") +``` + +The residual plot can be easily obtained by + +```{r fig.width = 6, fig.height = 5} +mkinresplot(m.L1.SFO, ylab = "Observed", xlab = "Time") +``` + +For comparison, the FOMC model is fitted as well, and the $\chi^2$ error level +is checked. + +```{r fig.width = 6, fig.height = 5} +m.L1.FOMC <- mkinfit("FOMC", FOCUS_2006_L1_mkin, quiet=TRUE) +plot(m.L1.FOMC, show_errmin = TRUE, main = "FOCUS L1 - FOMC") +summary(m.L1.FOMC, data = FALSE) +``` + +We get a warning that the default optimisation algorithm `Port` did not converge, which +is an indication that the model is overparameterised, *i.e.* contains too many +parameters that are ill-defined as a consequence. + +And in fact, due to the higher number of parameters, and the lower number of +degrees of freedom of the fit, the $\chi^2$ error level is actually higher for +the FOMC model (3.6%) than for the SFO model (3.4%). Additionally, the +parameters `log_alpha` and `log_beta` internally fitted in the model have +excessive confidence intervals, that span more than 25 orders of magnitude (!) +when backtransformed to the scale of `alpha` and `beta`. Also, the t-test +for significant difference from zero does not indicate such a significant difference, +with p-values greater than 0.1, and finally, the parameter correlation of `log_alpha` +and `log_beta` is 1.000, clearly indicating that the model is overparameterised. + +The $\chi^2$ error levels reported in Appendix 3 and Appendix 7 to the FOCUS +kinetics report are rounded to integer percentages and partly deviate by one +percentage point from the results calculated by mkin. The reason for +this is not known. However, mkin gives the same $\chi^2$ error levels +as the kinfit package and the calculation routines of the kinfit package have +been extensively compared to the results obtained by the KinGUI +software, as documented in the kinfit package vignette. KinGUI was the first +widely used standard package in this field. Also, the calculation of +$\chi^2$ error levels was compared with KinGUII, CAKE and DegKin manager in +a project sponsored by the German Umweltbundesamt [@ranke2014]. + +# Laboratory Data L2 + +The following code defines example dataset L2 from the FOCUS kinetics +report, p. 287: + +```{r} +FOCUS_2006_L2 = data.frame( + t = rep(c(0, 1, 3, 7, 14, 28), each = 2), + parent = c(96.1, 91.8, 41.4, 38.7, + 19.3, 22.3, 4.6, 4.6, + 2.6, 1.2, 0.3, 0.6)) +FOCUS_2006_L2_mkin <- mkin_wide_to_long(FOCUS_2006_L2) +``` + +## SFO fit for L2 + +Again, the SFO model is fitted and the result is plotted. The residual plot +can be obtained simply by adding the argument `show_residuals` to the plot +command. + +```{r fig.width = 7, fig.height = 6} +m.L2.SFO <- mkinfit("SFO", FOCUS_2006_L2_mkin, quiet=TRUE) +plot(m.L2.SFO, show_residuals = TRUE, show_errmin = TRUE, + main = "FOCUS L2 - SFO") +``` + +The $\chi^2$ error level of 14% suggests that the model does not fit very well. +This is also obvious from the plots of the fit, in which we have included +the residual plot. + +In the FOCUS kinetics report, it is stated that there is no apparent systematic +error observed from the residual plot up to the measured DT90 (approximately at +day 5), and there is an underestimation beyond that point. + +We may add that it is difficult to judge the random nature of the residuals just +from the three samplings at days 0, 1 and 3. Also, it is not clear _a +priori_ why a consistent underestimation after the approximate DT90 should be +irrelevant. However, this can be rationalised by the fact that the FOCUS fate +models generally only implement SFO kinetics. + +## FOMC fit for L2 + +For comparison, the FOMC model is fitted as well, and the $\chi^2$ error level +is checked. + +```{r fig.width = 7, fig.height = 6} +m.L2.FOMC <- mkinfit("FOMC", FOCUS_2006_L2_mkin, quiet = TRUE) +plot(m.L2.FOMC, show_residuals = TRUE, + main = "FOCUS L2 - FOMC") +summary(m.L2.FOMC, data = FALSE) +``` + +The error level at which the $\chi^2$ test passes is much lower in this case. +Therefore, the FOMC model provides a better description of the data, as less +experimental error has to be assumed in order to explain the data. + +## DFOP fit for L2 + +Fitting the four parameter DFOP model further reduces the $\chi^2$ error level. + +```{r fig.width = 7, fig.height = 6} +m.L2.DFOP <- mkinfit("DFOP", FOCUS_2006_L2_mkin, quiet = TRUE) +plot(m.L2.DFOP, show_residuals = TRUE, show_errmin = TRUE, + main = "FOCUS L2 - DFOP") +summary(m.L2.DFOP, data = FALSE) +``` + +Here, the DFOP model is clearly the best-fit model for dataset L2 based on the +chi^2 error level criterion. However, the failure to calculate the covariance +matrix indicates that the parameter estimates correlate excessively. Therefore, +the FOMC model may be preferred for this dataset. + +# Laboratory Data L3 + +The following code defines example dataset L3 from the FOCUS kinetics report, +p. 290. + +```{r} +FOCUS_2006_L3 = data.frame( + t = c(0, 3, 7, 14, 30, 60, 91, 120), + parent = c(97.8, 60, 51, 43, 35, 22, 15, 12)) +FOCUS_2006_L3_mkin <- mkin_wide_to_long(FOCUS_2006_L3) +``` + +## Fit multiple models + +As of mkin version 0.9-39 (June 2015), we can fit several models to +one or more datasets in one call to the function `mmkin`. The datasets +have to be passed in a list, in this case a named list holding only +the L3 dataset prepared above. + +```{r fig.height = 8} +# Only use one core here, not to offend the CRAN checks +mm.L3 <- mmkin(c("SFO", "FOMC", "DFOP"), cores = 1, + list("FOCUS L3" = FOCUS_2006_L3_mkin), quiet = TRUE) +plot(mm.L3) +``` + +The $\chi^2$ error level of 21% as well as the plot suggest that the SFO model +does not fit very well. The FOMC model performs better, with an +error level at which the $\chi^2$ test passes of 7%. Fitting the four +parameter DFOP model further reduces the $\chi^2$ error level +considerably. + +## Accessing mmkin objects + +The objects returned by mmkin are arranged like a matrix, with +models as a row index and datasets as a column index. + +We can extract the summary and plot for *e.g.* the DFOP fit, +using square brackets for indexing which will result in the use of +the summary and plot functions working on mkinfit objects. + +```{r fig.height = 5} +summary(mm.L3[["DFOP", 1]]) +plot(mm.L3[["DFOP", 1]], show_errmin = TRUE) +``` + +Here, a look to the model plot, the confidence intervals of the parameters +and the correlation matrix suggest that the parameter estimates are reliable, and +the DFOP model can be used as the best-fit model based on the $\chi^2$ error +level criterion for laboratory data L3. + +This is also an example where the standard t-test for the parameter `g_ilr` is +misleading, as it tests for a significant difference from zero. In this case, +zero appears to be the correct value for this parameter, and the confidence +interval for the backtransformed parameter `g` is quite narrow. + +# Laboratory Data L4 + +The following code defines example dataset L4 from the FOCUS kinetics +report, p. 293: + +```{r} +FOCUS_2006_L4 = data.frame( + t = c(0, 3, 7, 14, 30, 60, 91, 120), + parent = c(96.6, 96.3, 94.3, 88.8, 74.9, 59.9, 53.5, 49.0)) +FOCUS_2006_L4_mkin <- mkin_wide_to_long(FOCUS_2006_L4) +``` + +Fits of the SFO and FOMC models, plots and summaries are produced below: + +```{r fig.height = 6} +# Only use one core here, not to offend the CRAN checks +mm.L4 <- mmkin(c("SFO", "FOMC"), cores = 1, + list("FOCUS L4" = FOCUS_2006_L4_mkin), + quiet = TRUE) +plot(mm.L4) +``` + +The $\chi^2$ error level of 3.3% as well as the plot suggest that the SFO model +fits very well. The error level at which the $\chi^2$ test passes is slightly +lower for the FOMC model. However, the difference appears negligible. + + +```{r fig.height = 8} +summary(mm.L4[["SFO", 1]], data = FALSE) +summary(mm.L4[["FOMC", 1]], data = FALSE) +``` + + +# References diff --git a/vignettes/mkin.Rmd b/vignettes/mkin.Rmd deleted file mode 100644 index acca0e44..00000000 --- a/vignettes/mkin.Rmd +++ /dev/null @@ -1,238 +0,0 @@ ---- -title: Introduction to mkin -author: Johannes Ranke -date: "`r Sys.Date()`" -output: - html_document: - toc: true - toc_float: true - code_folding: hide - fig_retina: null -bibliography: references.bib -vignette: > - %\VignetteEngine{knitr::rmarkdown} - %\VignetteIndexEntry{mkin - Kinetic evaluation of chemical degradation data} - %\VignetteEncoding{UTF-8} ---- - -[Wissenschaftlicher Berater, Kronacher Str. 12, 79639 Grenzach-Wyhlen, Germany](http://www.jrwb.de)
-[Privatdozent at the University of Bremen](http://chem.uft.uni-bremen.de/ranke) - -```{r, include = FALSE} -require(knitr) -opts_chunk$set(engine='R', tidy=FALSE) -``` - -# Abstract -In the regulatory evaluation of chemical substances like plant protection -products (pesticides), biocides and other chemicals, degradation data play an -important role. For the evaluation of pesticide degradation experiments, -detailed guidance has been developed, based on nonlinear optimisation. -The `R` add-on package `mkin` implements fitting some of the models -recommended in this guidance from within R and calculates some statistical -measures for data series within one or more compartments, for parent and -metabolites. - -```{r, echo = TRUE, cache = TRUE, fig = TRUE, fig.width = 8, fig.height = 7} -library("mkin", quietly = TRUE) -# Define the kinetic model -m_SFO_SFO_SFO <- mkinmod(parent = mkinsub("SFO", "M1"), - M1 = mkinsub("SFO", "M2"), - M2 = mkinsub("SFO"), - use_of_ff = "max", quiet = TRUE) - - -# Produce model predictions using some arbitrary parameters -sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) -d_SFO_SFO_SFO <- mkinpredict(m_SFO_SFO_SFO, - c(k_parent = 0.03, - f_parent_to_M1 = 0.5, k_M1 = log(2)/100, - f_M1_to_M2 = 0.9, k_M2 = log(2)/50), - c(parent = 100, M1 = 0, M2 = 0), - sampling_times) - -# Generate a dataset by adding normally distributed errors with -# standard deviation 3, for two replicates at each sampling time -d_SFO_SFO_SFO_err <- add_err(d_SFO_SFO_SFO, reps = 2, - sdfunc = function(x) 3, - n = 1, seed = 123456789 ) - -# Fit the model to the dataset -f_SFO_SFO_SFO <- mkinfit(m_SFO_SFO_SFO, d_SFO_SFO_SFO_err[[1]], quiet = TRUE) - -# Plot the results separately for parent and metabolites -plot_sep(f_SFO_SFO_SFO, lpos = c("topright", "bottomright", "bottomright")) -``` - -# Background - -Many approaches are possible regarding the evaluation of chemical degradation -data. - -The `mkin` package [@pkg:mkin] implements the approach recommended in the -kinetics report provided by the FOrum for Co-ordination of pesticide fate -models and their USe [@FOCUS2006; -@FOCUSkinetics2014] -for simple decline data series, data series with transformation products, -commonly termed metabolites, and for data series for more than one compartment. -It is also possible to include back reactions, so equilibrium reactions and -equilibrium partitioning can be specified, although this oftentimes leads to an -overparameterisation of the model. - -When the first `mkin` code was published in 2010, the most commonly used tools for -fitting more complex kinetic degradation models to experimental data were -KinGUI [@schaefer2007], a MATLAB based tool with a graphical user -interface that was specifically tailored to the task and included some output -as proposed by the FOCUS Kinetics Workgroup, and ModelMaker, a general purpose -compartment based tool providing infrastructure for fitting dynamic simulation -models based on differential equations to data. - -The code was first uploaded to the BerliOS platform. When this was taken down, -the version control history was imported into the R-Forge site (see *e.g.* -[the initial commit on 11 May 2010](http://cgit.jrwb.de/mkin/commit/?id=30cbb4092f6d2d3beff5800603374a0d009ad770)), -where the code is still occasionally updated. - -At that time, the R package `FME` (Flexible Modelling Environment) -[@soetaert2010] was already available, and provided a good basis for -developing a package specifically tailored to the task. The remaining challenge -was to make it as easy as possible for the users (including the author of this -vignette) to specify the system of differential equations and to include the -output requested by the FOCUS guidance, such as the relative standard deviation -that has to be assumed for the residuals, such that the $\chi^2$ -goodness-of-fit test as defined by the FOCUS kinetics workgroup would pass -using an significance level $\alpha$ of 0.05. This relative error, expressed -as a percentage, is often termed $\chi^2$ error level or similar. - -Also, `mkin` introduced using analytical solutions for parent only kinetics for -improved optimization speed. Later, Eigenvalue based solutions were -introduced to `mkin` for the case of linear differential equations (*i.e.* -where the FOMC or DFOP models were not used for the parent compound), greatly -improving the optimization speed for these cases. This, however, has become -somehow obsolete, as the use of compiled code described below gives even -smaller execution times. - -The possibility to specify back-reactions and a biphasic model (SFORB) for -metabolites were present in `mkin` from the very beginning. - -## Derived software tools - -Soon after the publication of `mkin`, two derived tools were published, namely -KinGUII (available from Bayer Crop Science) and CAKE (commissioned to Tessella -by Syngenta), which added a graphical user interface (GUI), and added fitting by -iteratively reweighted least squares (IRLS) and characterisation of likely -parameter distributions by Markov Chain Monte Carlo (MCMC) sampling. - -CAKE focuses on a smooth use experience, sacrificing some flexibility in the model -definition, originally allowing only two primary metabolites in parallel. -The current version 3.3 of CAKE release in March 2016 uses a basic scheme for -up to six metabolites in a flexible arrangement, but does not support -back-reactions (non-instantaneous equilibria) or biphasic kinetics for metabolites. - -KinGUI offers an even more flexible widget for specifying complex kinetic -models. Back-reactions (non-instantaneous equilibria) were supported early on, -but until 2014, only simple first-order models could be specified for -transformation products. Starting with KinGUII version 2.1, biphasic modelling -of metabolites was also available in KinGUII. - -A further graphical user interface (GUI) that has recently been brought to a decent -degree of maturity is the browser based GUI named `gmkin`. Please see its -[documentation page](https://pkgdown.jrwb.de/gmkin) and -[manual](https://pkgdown.jrwb.de/gmkin/articles/gmkin_manual.html) -for further information. - -A comparison of scope, usability and numerical results obtained with these -tools has been recently been published by @ranke2018. - -## Recent developments - -Currently (July 2019), the main features available in `mkin` which are not -present in KinGUII or CAKE, are the speed increase by using compiled code when -a compiler is present, parallel model fitting on multicore machines using the -`mmkin` function, and the estimation of parameter confidence intervals based on -transformed parameters. - -In addition, the possibility to use two alternative error models to constant -variance have been integrated. The variance by variable error model introduced -by @gao11 has been available via an iteratively reweighted least squares (IRLS) -procedure since mkin -[version 0.9-22](https://pkgdown.jrwb.de/mkin/news/index.html#mkin-0-9-22-2013-10-26). -With [release 0.9.49.5](https://pkgdown.jrwb.de/mkin/news/index.html#mkin-0-9-49-5-2019-07-04), -the IRLS algorithm has been replaced by direct or step-wise maximisation of -the likelihood function, which makes it possible not only to fit the -variance by variable error model but also a -[two-component error model](https://pkgdown.jrwb.de/mkin/reference/sigma_twocomp.html) -inspired by error models developed in analytical chemistry. - -# Internal parameter transformations - -For rate constants, the log -transformation is used, as proposed by Bates and Watts [-@bates1988, p. 77, -149]. Approximate intervals are constructed for the transformed rate -constants [compare @bates1988, p. 135], *i.e.* for their logarithms. -Confidence intervals for the rate constants are then obtained using the -appropriate backtransformation using the exponential function. - -In the first version of `mkin` allowing for specifying models using -formation fractions, a home-made reparameterisation was used in order to ensure -that the sum of formation fractions would not exceed unity. - -This method is still used in the current version of KinGUII (v2.1 from April -2014), with a modification that allows for fixing the pathway to sink to zero. -CAKE uses penalties in the objective function in order to enforce this -constraint. - -In 2012, an alternative reparameterisation of the formation fractions was -proposed together with René Lehmann [@ranke2012], based on isometric -logratio transformation (ILR). The aim was to improve the validity of the -linear approximation of the objective function during the parameter -estimation procedure as well as in the subsequent calculation of parameter -confidence intervals. - -## Confidence intervals based on transformed parameters - -In the first attempt at providing improved parameter confidence intervals -introduced to `mkin` in 2013, confidence intervals obtained from -FME on the transformed parameters were simply all backtransformed one by one -to yield asymmetric confidence intervals for the backtransformed parameters. - -However, while there is a 1:1 relation between the rate constants in the model -and the transformed parameters fitted in the model, the parameters obtained by the -isometric logratio transformation are calculated from the set of formation -fractions that quantify the paths to each of the compounds formed from a -specific parent compound, and no such 1:1 relation exists. - -Therefore, parameter confidence intervals for formation fractions obtained with -this method only appear valid for the case of a single transformation product, where -only one formation fraction is to be estimated, directly corresponding to one -component of the ilr transformed parameter. - -The confidence intervals obtained by backtransformation for the cases where a -1:1 relation between transformed and original parameter exist are considered by -the author of this vignette to be more accurate than those obtained using a -re-estimation of the Hessian matrix after backtransformation, as implemented -in the FME package. - -## Parameter t-test based on untransformed parameters - -The standard output of many nonlinear regression software packages includes -the results from a test for significant difference from zero for all parameters. -Such a test is also recommended to check the validity of rate constants in the -FOCUS guidance [@FOCUSkinetics2014, p. 96ff]. - -It has been argued that the precondition for this test, *i.e.* normal distribution -of the estimator for the parameters, is not fulfilled in the case of nonlinear regression -[@ranke2015]. However, this test is commonly used by industry, consultants and -national authorities in order to decide on the reliability of parameter estimates, based -on the FOCUS guidance mentioned above. Therefore, the results of this one-sided -t-test are included in the summary output from `mkin`. - -As it is not reasonable to test for significant difference of the transformed -parameters (*e.g.* $log(k)$) from zero, the t-test is calculated based on the -model definition before parameter transformation, *i.e.* in a similar way as in -packages that do not apply such an internal parameter transformation. A note -is included in the `mkin` output, pointing to the fact that the t-test is based -on the unjustified assumption of normal distribution of the parameter -estimators. - -# References - - diff --git a/vignettes/mkin.rmd b/vignettes/mkin.rmd new file mode 100644 index 00000000..acca0e44 --- /dev/null +++ b/vignettes/mkin.rmd @@ -0,0 +1,238 @@ +--- +title: Introduction to mkin +author: Johannes Ranke +date: "`r Sys.Date()`" +output: + html_document: + toc: true + toc_float: true + code_folding: hide + fig_retina: null +bibliography: references.bib +vignette: > + %\VignetteEngine{knitr::rmarkdown} + %\VignetteIndexEntry{mkin - Kinetic evaluation of chemical degradation data} + %\VignetteEncoding{UTF-8} +--- + +[Wissenschaftlicher Berater, Kronacher Str. 12, 79639 Grenzach-Wyhlen, Germany](http://www.jrwb.de)
+[Privatdozent at the University of Bremen](http://chem.uft.uni-bremen.de/ranke) + +```{r, include = FALSE} +require(knitr) +opts_chunk$set(engine='R', tidy=FALSE) +``` + +# Abstract +In the regulatory evaluation of chemical substances like plant protection +products (pesticides), biocides and other chemicals, degradation data play an +important role. For the evaluation of pesticide degradation experiments, +detailed guidance has been developed, based on nonlinear optimisation. +The `R` add-on package `mkin` implements fitting some of the models +recommended in this guidance from within R and calculates some statistical +measures for data series within one or more compartments, for parent and +metabolites. + +```{r, echo = TRUE, cache = TRUE, fig = TRUE, fig.width = 8, fig.height = 7} +library("mkin", quietly = TRUE) +# Define the kinetic model +m_SFO_SFO_SFO <- mkinmod(parent = mkinsub("SFO", "M1"), + M1 = mkinsub("SFO", "M2"), + M2 = mkinsub("SFO"), + use_of_ff = "max", quiet = TRUE) + + +# Produce model predictions using some arbitrary parameters +sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) +d_SFO_SFO_SFO <- mkinpredict(m_SFO_SFO_SFO, + c(k_parent = 0.03, + f_parent_to_M1 = 0.5, k_M1 = log(2)/100, + f_M1_to_M2 = 0.9, k_M2 = log(2)/50), + c(parent = 100, M1 = 0, M2 = 0), + sampling_times) + +# Generate a dataset by adding normally distributed errors with +# standard deviation 3, for two replicates at each sampling time +d_SFO_SFO_SFO_err <- add_err(d_SFO_SFO_SFO, reps = 2, + sdfunc = function(x) 3, + n = 1, seed = 123456789 ) + +# Fit the model to the dataset +f_SFO_SFO_SFO <- mkinfit(m_SFO_SFO_SFO, d_SFO_SFO_SFO_err[[1]], quiet = TRUE) + +# Plot the results separately for parent and metabolites +plot_sep(f_SFO_SFO_SFO, lpos = c("topright", "bottomright", "bottomright")) +``` + +# Background + +Many approaches are possible regarding the evaluation of chemical degradation +data. + +The `mkin` package [@pkg:mkin] implements the approach recommended in the +kinetics report provided by the FOrum for Co-ordination of pesticide fate +models and their USe [@FOCUS2006; -@FOCUSkinetics2014] +for simple decline data series, data series with transformation products, +commonly termed metabolites, and for data series for more than one compartment. +It is also possible to include back reactions, so equilibrium reactions and +equilibrium partitioning can be specified, although this oftentimes leads to an +overparameterisation of the model. + +When the first `mkin` code was published in 2010, the most commonly used tools for +fitting more complex kinetic degradation models to experimental data were +KinGUI [@schaefer2007], a MATLAB based tool with a graphical user +interface that was specifically tailored to the task and included some output +as proposed by the FOCUS Kinetics Workgroup, and ModelMaker, a general purpose +compartment based tool providing infrastructure for fitting dynamic simulation +models based on differential equations to data. + +The code was first uploaded to the BerliOS platform. When this was taken down, +the version control history was imported into the R-Forge site (see *e.g.* +[the initial commit on 11 May 2010](http://cgit.jrwb.de/mkin/commit/?id=30cbb4092f6d2d3beff5800603374a0d009ad770)), +where the code is still occasionally updated. + +At that time, the R package `FME` (Flexible Modelling Environment) +[@soetaert2010] was already available, and provided a good basis for +developing a package specifically tailored to the task. The remaining challenge +was to make it as easy as possible for the users (including the author of this +vignette) to specify the system of differential equations and to include the +output requested by the FOCUS guidance, such as the relative standard deviation +that has to be assumed for the residuals, such that the $\chi^2$ +goodness-of-fit test as defined by the FOCUS kinetics workgroup would pass +using an significance level $\alpha$ of 0.05. This relative error, expressed +as a percentage, is often termed $\chi^2$ error level or similar. + +Also, `mkin` introduced using analytical solutions for parent only kinetics for +improved optimization speed. Later, Eigenvalue based solutions were +introduced to `mkin` for the case of linear differential equations (*i.e.* +where the FOMC or DFOP models were not used for the parent compound), greatly +improving the optimization speed for these cases. This, however, has become +somehow obsolete, as the use of compiled code described below gives even +smaller execution times. + +The possibility to specify back-reactions and a biphasic model (SFORB) for +metabolites were present in `mkin` from the very beginning. + +## Derived software tools + +Soon after the publication of `mkin`, two derived tools were published, namely +KinGUII (available from Bayer Crop Science) and CAKE (commissioned to Tessella +by Syngenta), which added a graphical user interface (GUI), and added fitting by +iteratively reweighted least squares (IRLS) and characterisation of likely +parameter distributions by Markov Chain Monte Carlo (MCMC) sampling. + +CAKE focuses on a smooth use experience, sacrificing some flexibility in the model +definition, originally allowing only two primary metabolites in parallel. +The current version 3.3 of CAKE release in March 2016 uses a basic scheme for +up to six metabolites in a flexible arrangement, but does not support +back-reactions (non-instantaneous equilibria) or biphasic kinetics for metabolites. + +KinGUI offers an even more flexible widget for specifying complex kinetic +models. Back-reactions (non-instantaneous equilibria) were supported early on, +but until 2014, only simple first-order models could be specified for +transformation products. Starting with KinGUII version 2.1, biphasic modelling +of metabolites was also available in KinGUII. + +A further graphical user interface (GUI) that has recently been brought to a decent +degree of maturity is the browser based GUI named `gmkin`. Please see its +[documentation page](https://pkgdown.jrwb.de/gmkin) and +[manual](https://pkgdown.jrwb.de/gmkin/articles/gmkin_manual.html) +for further information. + +A comparison of scope, usability and numerical results obtained with these +tools has been recently been published by @ranke2018. + +## Recent developments + +Currently (July 2019), the main features available in `mkin` which are not +present in KinGUII or CAKE, are the speed increase by using compiled code when +a compiler is present, parallel model fitting on multicore machines using the +`mmkin` function, and the estimation of parameter confidence intervals based on +transformed parameters. + +In addition, the possibility to use two alternative error models to constant +variance have been integrated. The variance by variable error model introduced +by @gao11 has been available via an iteratively reweighted least squares (IRLS) +procedure since mkin +[version 0.9-22](https://pkgdown.jrwb.de/mkin/news/index.html#mkin-0-9-22-2013-10-26). +With [release 0.9.49.5](https://pkgdown.jrwb.de/mkin/news/index.html#mkin-0-9-49-5-2019-07-04), +the IRLS algorithm has been replaced by direct or step-wise maximisation of +the likelihood function, which makes it possible not only to fit the +variance by variable error model but also a +[two-component error model](https://pkgdown.jrwb.de/mkin/reference/sigma_twocomp.html) +inspired by error models developed in analytical chemistry. + +# Internal parameter transformations + +For rate constants, the log +transformation is used, as proposed by Bates and Watts [-@bates1988, p. 77, +149]. Approximate intervals are constructed for the transformed rate +constants [compare @bates1988, p. 135], *i.e.* for their logarithms. +Confidence intervals for the rate constants are then obtained using the +appropriate backtransformation using the exponential function. + +In the first version of `mkin` allowing for specifying models using +formation fractions, a home-made reparameterisation was used in order to ensure +that the sum of formation fractions would not exceed unity. + +This method is still used in the current version of KinGUII (v2.1 from April +2014), with a modification that allows for fixing the pathway to sink to zero. +CAKE uses penalties in the objective function in order to enforce this +constraint. + +In 2012, an alternative reparameterisation of the formation fractions was +proposed together with René Lehmann [@ranke2012], based on isometric +logratio transformation (ILR). The aim was to improve the validity of the +linear approximation of the objective function during the parameter +estimation procedure as well as in the subsequent calculation of parameter +confidence intervals. + +## Confidence intervals based on transformed parameters + +In the first attempt at providing improved parameter confidence intervals +introduced to `mkin` in 2013, confidence intervals obtained from +FME on the transformed parameters were simply all backtransformed one by one +to yield asymmetric confidence intervals for the backtransformed parameters. + +However, while there is a 1:1 relation between the rate constants in the model +and the transformed parameters fitted in the model, the parameters obtained by the +isometric logratio transformation are calculated from the set of formation +fractions that quantify the paths to each of the compounds formed from a +specific parent compound, and no such 1:1 relation exists. + +Therefore, parameter confidence intervals for formation fractions obtained with +this method only appear valid for the case of a single transformation product, where +only one formation fraction is to be estimated, directly corresponding to one +component of the ilr transformed parameter. + +The confidence intervals obtained by backtransformation for the cases where a +1:1 relation between transformed and original parameter exist are considered by +the author of this vignette to be more accurate than those obtained using a +re-estimation of the Hessian matrix after backtransformation, as implemented +in the FME package. + +## Parameter t-test based on untransformed parameters + +The standard output of many nonlinear regression software packages includes +the results from a test for significant difference from zero for all parameters. +Such a test is also recommended to check the validity of rate constants in the +FOCUS guidance [@FOCUSkinetics2014, p. 96ff]. + +It has been argued that the precondition for this test, *i.e.* normal distribution +of the estimator for the parameters, is not fulfilled in the case of nonlinear regression +[@ranke2015]. However, this test is commonly used by industry, consultants and +national authorities in order to decide on the reliability of parameter estimates, based +on the FOCUS guidance mentioned above. Therefore, the results of this one-sided +t-test are included in the summary output from `mkin`. + +As it is not reasonable to test for significant difference of the transformed +parameters (*e.g.* $log(k)$) from zero, the t-test is calculated based on the +model definition before parameter transformation, *i.e.* in a similar way as in +packages that do not apply such an internal parameter transformation. A note +is included in the `mkin` output, pointing to the fact that the t-test is based +on the unjustified assumption of normal distribution of the parameter +estimators. + +# References + + diff --git a/vignettes/twa.Rmd b/vignettes/twa.Rmd deleted file mode 100644 index 6f283eb2..00000000 --- a/vignettes/twa.Rmd +++ /dev/null @@ -1,79 +0,0 @@ ---- -title: Calculation of time weighted average concentrations with mkin -author: Johannes Ranke -date: "`r Sys.Date()`" -bibliography: references.bib -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Calculation of time weighted average concentrations with mkin} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -Since version 0.9.45.1 of the 'mkin' package, a function for calculating -time weighted average concentrations for decline kinetics (*i.e.* only -for the compound applied in the experiment) is included. Strictly -speaking, they are maximum moving window time weighted average concentrations, -*i.e.* the maximum time weighted average concentration that can be found -when moving a time window of a specified width over the decline curve. - -Time weighted average concentrations for the SFO, FOMC and the DFOP model are -calculated using the formulas given in the FOCUS kinetics guidance -[@FOCUSkinetics2014, p. 251]: - -SFO: - -$$c_\textrm{twa} = c_0 \frac{\left( 1 - e^{- k t} \right)}{ k t} $$ - -FOMC: - -$$c_\textrm{twa} = c_0 \frac{\beta}{t (1 - \alpha)} - \left( \left(\frac{t}{\beta} + 1 \right)^{1 - \alpha} - 1 \right) $$ - -DFOP: - -$$c_\textrm{twa} = \frac{c_0}{t} \left( - \frac{g}{k_1} \left( 1 - e^{- k_1 t} \right) + - \frac{1-g}{k_2} \left( 1 - e^{- k_2 t} \right) \right) $$ - -HS for $t > t_b$: - -$$c_\textrm{twa} = \frac{c_0}{t} \left( - \frac{1}{k_1} \left( 1 - e^{- k_1 t_b} \right) + - \frac{e^{- k_1 t_b}}{k_2} \left( 1 - e^{- k_2 (t - t_b)} \right) \right) $$ - -Often, the ratio between the time weighted average concentration $c_\textrm{twa}$ -and the initial concentration $c_0$ - -$$f_\textrm{twa} = \frac{c_\textrm{twa}}{c_0}$$ - -is needed. This can be calculated from the fitted initial concentration $c_0$ and -the time weighted average concentration $c_\textrm{twa}$, or directly from -the model parameters using the following formulas: - -SFO: - -$$f_\textrm{twa} = \frac{\left( 1 - e^{- k t} \right)}{k t} $$ - -FOMC: - -$$f_\textrm{twa} = \frac{\beta}{t (1 - \alpha)} - \left( \left(\frac{t}{\beta} + 1 \right)^{1 - \alpha} - 1 \right) $$ - -DFOP: - -$$f_\textrm{twa} = \frac{1}{t} \left( - \frac{g}{k_1} \left( 1 - e^{- k_1 t} \right) + - \frac{1-g}{k_2} \left( 1 - e^{- k_2 t} \right) \right) $$ - -HS for $t > t_b$: - -$$f_\textrm{twa} = \frac{1}{t} \left( - \frac{1}{k_1} \left( 1 - e^{- k_1 t_b} \right) + - \frac{e^{- k_1 t_b}}{k_2} \left( 1 - e^{- k_2 (t - t_b)} \right) \right) $$ - -Note that a method for calculating maximum moving window time weighted average -concentrations for a model fitted by 'mkinfit' or from parent decline model -parameters is included in the `max_twa_parent()` function. If the same is -needed for metabolites, the function `pfm::max_twa()` from the 'pfm' package -can be used. diff --git a/vignettes/twa.rmd b/vignettes/twa.rmd new file mode 100644 index 00000000..6f283eb2 --- /dev/null +++ b/vignettes/twa.rmd @@ -0,0 +1,79 @@ +--- +title: Calculation of time weighted average concentrations with mkin +author: Johannes Ranke +date: "`r Sys.Date()`" +bibliography: references.bib +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Calculation of time weighted average concentrations with mkin} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +Since version 0.9.45.1 of the 'mkin' package, a function for calculating +time weighted average concentrations for decline kinetics (*i.e.* only +for the compound applied in the experiment) is included. Strictly +speaking, they are maximum moving window time weighted average concentrations, +*i.e.* the maximum time weighted average concentration that can be found +when moving a time window of a specified width over the decline curve. + +Time weighted average concentrations for the SFO, FOMC and the DFOP model are +calculated using the formulas given in the FOCUS kinetics guidance +[@FOCUSkinetics2014, p. 251]: + +SFO: + +$$c_\textrm{twa} = c_0 \frac{\left( 1 - e^{- k t} \right)}{ k t} $$ + +FOMC: + +$$c_\textrm{twa} = c_0 \frac{\beta}{t (1 - \alpha)} + \left( \left(\frac{t}{\beta} + 1 \right)^{1 - \alpha} - 1 \right) $$ + +DFOP: + +$$c_\textrm{twa} = \frac{c_0}{t} \left( + \frac{g}{k_1} \left( 1 - e^{- k_1 t} \right) + + \frac{1-g}{k_2} \left( 1 - e^{- k_2 t} \right) \right) $$ + +HS for $t > t_b$: + +$$c_\textrm{twa} = \frac{c_0}{t} \left( + \frac{1}{k_1} \left( 1 - e^{- k_1 t_b} \right) + + \frac{e^{- k_1 t_b}}{k_2} \left( 1 - e^{- k_2 (t - t_b)} \right) \right) $$ + +Often, the ratio between the time weighted average concentration $c_\textrm{twa}$ +and the initial concentration $c_0$ + +$$f_\textrm{twa} = \frac{c_\textrm{twa}}{c_0}$$ + +is needed. This can be calculated from the fitted initial concentration $c_0$ and +the time weighted average concentration $c_\textrm{twa}$, or directly from +the model parameters using the following formulas: + +SFO: + +$$f_\textrm{twa} = \frac{\left( 1 - e^{- k t} \right)}{k t} $$ + +FOMC: + +$$f_\textrm{twa} = \frac{\beta}{t (1 - \alpha)} + \left( \left(\frac{t}{\beta} + 1 \right)^{1 - \alpha} - 1 \right) $$ + +DFOP: + +$$f_\textrm{twa} = \frac{1}{t} \left( + \frac{g}{k_1} \left( 1 - e^{- k_1 t} \right) + + \frac{1-g}{k_2} \left( 1 - e^{- k_2 t} \right) \right) $$ + +HS for $t > t_b$: + +$$f_\textrm{twa} = \frac{1}{t} \left( + \frac{1}{k_1} \left( 1 - e^{- k_1 t_b} \right) + + \frac{e^{- k_1 t_b}}{k_2} \left( 1 - e^{- k_2 (t - t_b)} \right) \right) $$ + +Note that a method for calculating maximum moving window time weighted average +concentrations for a model fitted by 'mkinfit' or from parent decline model +parameters is included in the `max_twa_parent()` function. If the same is +needed for metabolites, the function `pfm::max_twa()` from the 'pfm' package +can be used. diff --git a/vignettes/web_only/FOCUS_Z.Rmd b/vignettes/web_only/FOCUS_Z.Rmd deleted file mode 100644 index 2da7fde7..00000000 --- a/vignettes/web_only/FOCUS_Z.Rmd +++ /dev/null @@ -1,255 +0,0 @@ ---- -title: Example evaluation of FOCUS dataset Z -author: Johannes Ranke -date: "`r Sys.Date()`" -output: - html_document: - toc: true - toc_float: true - code_folding: show - fig_retina: null -bibliography: ../references.bib -vignette: > - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -[Wissenschaftlicher Berater, Kronacher Str. 12, 79639 Grenzach-Wyhlen, Germany](http://www.jrwb.de)
-[Privatdozent at the University of Bremen](http://chem.uft.uni-bremen.de/ranke) - -```{r, include = FALSE} -require(knitr) -options(digits = 5) -opts_chunk$set(engine='R', tidy = FALSE) -``` - -# The data - -The following code defines the example dataset from Appendix 7 to the FOCUS kinetics -report [@FOCUSkinetics2014, p. 354]. - -```{r, echo = TRUE, fig = TRUE, fig.width = 8, fig.height = 7} -library(mkin, quietly = TRUE) -LOD = 0.5 -FOCUS_2006_Z = data.frame( - t = c(0, 0.04, 0.125, 0.29, 0.54, 1, 2, 3, 4, 7, 10, 14, 21, - 42, 61, 96, 124), - Z0 = c(100, 81.7, 70.4, 51.1, 41.2, 6.6, 4.6, 3.9, 4.6, 4.3, 6.8, - 2.9, 3.5, 5.3, 4.4, 1.2, 0.7), - Z1 = c(0, 18.3, 29.6, 46.3, 55.1, 65.7, 39.1, 36, 15.3, 5.6, 1.1, - 1.6, 0.6, 0.5 * LOD, NA, NA, NA), - Z2 = c(0, NA, 0.5 * LOD, 2.6, 3.8, 15.3, 37.2, 31.7, 35.6, 14.5, - 0.8, 2.1, 1.9, 0.5 * LOD, NA, NA, NA), - Z3 = c(0, NA, NA, NA, NA, 0.5 * LOD, 9.2, 13.1, 22.3, 28.4, 32.5, - 25.2, 17.2, 4.8, 4.5, 2.8, 4.4)) - -FOCUS_2006_Z_mkin <- mkin_wide_to_long(FOCUS_2006_Z) -``` - -# Parent and one metabolite - -The next step is to set up the models used for the kinetic analysis. As the -simultaneous fit of parent and the first metabolite is usually straightforward, -Step 1 (SFO for parent only) is skipped here. We start with the model 2a, -with formation and decline of metabolite Z1 and the pathway from parent -directly to sink included (default in mkin). - -```{r FOCUS_2006_Z_fits_1, echo=TRUE, fig.height=6} -Z.2a <- mkinmod(Z0 = mkinsub("SFO", "Z1"), - Z1 = mkinsub("SFO")) -m.Z.2a <- mkinfit(Z.2a, FOCUS_2006_Z_mkin, quiet = TRUE) -plot_sep(m.Z.2a) -summary(m.Z.2a, data = FALSE)$bpar -``` - -As obvious from the parameter summary (the \texttt{bpar} component of the -summary), the kinetic rate constant from parent compound Z to sink -is very small and the t-test for this parameter suggests that it is -not significantly different from zero. This suggests, in agreement with the -analysis in the FOCUS kinetics report, to simplify the model by removing the -pathway to sink. - -A similar result can be obtained when formation fractions are used in the model -formulation: - -```{r FOCUS_2006_Z_fits_2, echo=TRUE, fig.height=6} -Z.2a.ff <- mkinmod(Z0 = mkinsub("SFO", "Z1"), - Z1 = mkinsub("SFO"), - use_of_ff = "max") - -m.Z.2a.ff <- mkinfit(Z.2a.ff, FOCUS_2006_Z_mkin, quiet = TRUE) -plot_sep(m.Z.2a.ff) -summary(m.Z.2a.ff, data = FALSE)$bpar -``` - -Here, the ilr transformed formation fraction fitted in the model takes a very -large value, and the backtransformed formation fraction from parent Z to Z1 is -practically unity. Here, the covariance matrix used for the calculation -of confidence intervals is not returned as the model is -overparameterised. - -A simplified model is obtained by removing the pathway to the sink. -\footnote{If the model formulation without formation fractions -is used, the same effect can be obtained by fixing the parameter \texttt{k\_Z\_sink} -to a value of zero.} - -In the following, we use the parameterisation with formation fractions in order -to be able to compare with the results in the FOCUS guidance, and as it -makes it easier to use parameters obtained in a previous fit when adding a further -metabolite. - -```{r FOCUS_2006_Z_fits_3, echo=TRUE, fig.height=6} -Z.3 <- mkinmod(Z0 = mkinsub("SFO", "Z1", sink = FALSE), - Z1 = mkinsub("SFO"), use_of_ff = "max") -m.Z.3 <- mkinfit(Z.3, FOCUS_2006_Z_mkin, quiet = TRUE) -plot_sep(m.Z.3) -summary(m.Z.3, data = FALSE)$bpar -``` - -As there is only one transformation product for Z0 and no pathway -to sink, the formation fraction is internally fixed to unity. - -# Metabolites Z2 and Z3 - -As suggested in the FOCUS report, the pathway to sink was removed for metabolite Z1 as -well in the next step. While this step appears questionable on the basis of the above results, it -is followed here for the purpose of comparison. Also, in the FOCUS report, it is -assumed that there is additional empirical evidence that Z1 quickly and exclusively -hydrolyses to Z2. - -```{r FOCUS_2006_Z_fits_5, echo=TRUE, fig.height=7} -Z.5 <- mkinmod(Z0 = mkinsub("SFO", "Z1", sink = FALSE), - Z1 = mkinsub("SFO", "Z2", sink = FALSE), - Z2 = mkinsub("SFO"), use_of_ff = "max") -m.Z.5 <- mkinfit(Z.5, FOCUS_2006_Z_mkin, quiet = TRUE) -plot_sep(m.Z.5) -``` - -Finally, metabolite Z3 is added to the model. We use the optimised -differential equation parameter values from the previous fit in order to -accelerate the optimization. - -```{r FOCUS_2006_Z_fits_6, echo=TRUE, fig.height=8} -Z.FOCUS <- mkinmod(Z0 = mkinsub("SFO", "Z1", sink = FALSE), - Z1 = mkinsub("SFO", "Z2", sink = FALSE), - Z2 = mkinsub("SFO", "Z3"), - Z3 = mkinsub("SFO"), - use_of_ff = "max") -m.Z.FOCUS <- mkinfit(Z.FOCUS, FOCUS_2006_Z_mkin, - parms.ini = m.Z.5$bparms.ode, - quiet = TRUE) -plot_sep(m.Z.FOCUS) -summary(m.Z.FOCUS, data = FALSE)$bpar -endpoints(m.Z.FOCUS) -``` - -This fit corresponds to the final result chosen in Appendix 7 of the FOCUS -report. Confidence intervals returned by mkin are based on internally -transformed parameters, however. - -# Using the SFORB model - -As the FOCUS report states, there is a certain tailing of the time course of metabolite -Z3. Also, the time course of the parent compound is not fitted very well using the -SFO model, as residues at a certain low level remain. - -Therefore, an additional model is offered here, using the single first-order -reversible binding (SFORB) model for metabolite Z3. As expected, the $\chi^2$ -error level is lower for metabolite Z3 using this model and the graphical -fit for Z3 is improved. However, the covariance matrix is not returned. - -```{r FOCUS_2006_Z_fits_7, echo=TRUE, fig.height=8} -Z.mkin.1 <- mkinmod(Z0 = mkinsub("SFO", "Z1", sink = FALSE), - Z1 = mkinsub("SFO", "Z2", sink = FALSE), - Z2 = mkinsub("SFO", "Z3"), - Z3 = mkinsub("SFORB")) -m.Z.mkin.1 <- mkinfit(Z.mkin.1, FOCUS_2006_Z_mkin, quiet = TRUE) -plot_sep(m.Z.mkin.1) -summary(m.Z.mkin.1, data = FALSE)$cov.unscaled -``` - -Therefore, a further stepwise model building is performed starting from the -stage of parent and two metabolites, starting from the assumption that the model -fit for the parent compound can be improved by using the SFORB model. - -```{r FOCUS_2006_Z_fits_9, echo=TRUE, fig.height=8} -Z.mkin.3 <- mkinmod(Z0 = mkinsub("SFORB", "Z1", sink = FALSE), - Z1 = mkinsub("SFO", "Z2", sink = FALSE), - Z2 = mkinsub("SFO")) -m.Z.mkin.3 <- mkinfit(Z.mkin.3, FOCUS_2006_Z_mkin, quiet = TRUE) -plot_sep(m.Z.mkin.3) -``` - -This results in a much better representation of the behaviour of the parent -compound Z0. - -Finally, Z3 is added as well. These models appear overparameterised (no -covariance matrix returned) if the sink for Z1 is left in the models. - -```{r FOCUS_2006_Z_fits_10, echo=TRUE, fig.height=8} -Z.mkin.4 <- mkinmod(Z0 = mkinsub("SFORB", "Z1", sink = FALSE), - Z1 = mkinsub("SFO", "Z2", sink = FALSE), - Z2 = mkinsub("SFO", "Z3"), - Z3 = mkinsub("SFO")) -m.Z.mkin.4 <- mkinfit(Z.mkin.4, FOCUS_2006_Z_mkin, - parms.ini = m.Z.mkin.3$bparms.ode, - quiet = TRUE) -plot_sep(m.Z.mkin.4) -``` - -The error level of the fit, but especially of metabolite Z3, can be improved if -the SFORB model is chosen for this metabolite, as this model is capable of -representing the tailing of the metabolite decline phase. - -```{r FOCUS_2006_Z_fits_11, echo=TRUE, fig.height=8} -Z.mkin.5 <- mkinmod(Z0 = mkinsub("SFORB", "Z1", sink = FALSE), - Z1 = mkinsub("SFO", "Z2", sink = FALSE), - Z2 = mkinsub("SFO", "Z3"), - Z3 = mkinsub("SFORB")) -m.Z.mkin.5 <- mkinfit(Z.mkin.5, FOCUS_2006_Z_mkin, - parms.ini = m.Z.mkin.4$bparms.ode[1:4], - quiet = TRUE) -plot_sep(m.Z.mkin.5) -``` - -The summary view of the backtransformed parameters shows that we get no -confidence intervals due to overparameterisation. As the optimized -\texttt{k\_Z3\_bound\_free} is excessively small, it seems reasonable to fix it to -zero. - -```{r FOCUS_2006_Z_fits_11a, echo=TRUE} -m.Z.mkin.5a <- mkinfit(Z.mkin.5, FOCUS_2006_Z_mkin, - parms.ini = c(m.Z.mkin.5$bparms.ode[1:7], - k_Z3_bound_free = 0), - fixed_parms = "k_Z3_bound_free", - quiet = TRUE) -plot_sep(m.Z.mkin.5a) -``` - -As expected, the residual plots for Z0 and Z3 are more random than in the case of the -all SFO model for which they were shown above. In conclusion, the model -\texttt{Z.mkin.5a} is proposed as the best-fit model for the dataset from -Appendix 7 of the FOCUS report. - -A graphical representation of the confidence intervals can finally be obtained. - -```{r FOCUS_2006_Z_fits_11b, echo=TRUE} -mkinparplot(m.Z.mkin.5a) -``` - -The endpoints obtained with this model are - -```{r FOCUS_2006_Z_fits_11b_endpoints, echo=TRUE} -endpoints(m.Z.mkin.5a) -``` - -It is clear the degradation rate of Z3 towards the end of the experiment -is very low as DT50\_Z3\_b2 (the second Eigenvalue of the system of two differential -equations representing the SFORB system for Z3, corresponding to the slower rate -constant of the DFOP model) is reported to be infinity. However, this appears -to be a feature of the data. - - -# References - - diff --git a/vignettes/web_only/FOCUS_Z.rmd b/vignettes/web_only/FOCUS_Z.rmd new file mode 100644 index 00000000..2da7fde7 --- /dev/null +++ b/vignettes/web_only/FOCUS_Z.rmd @@ -0,0 +1,255 @@ +--- +title: Example evaluation of FOCUS dataset Z +author: Johannes Ranke +date: "`r Sys.Date()`" +output: + html_document: + toc: true + toc_float: true + code_folding: show + fig_retina: null +bibliography: ../references.bib +vignette: > + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +[Wissenschaftlicher Berater, Kronacher Str. 12, 79639 Grenzach-Wyhlen, Germany](http://www.jrwb.de)
+[Privatdozent at the University of Bremen](http://chem.uft.uni-bremen.de/ranke) + +```{r, include = FALSE} +require(knitr) +options(digits = 5) +opts_chunk$set(engine='R', tidy = FALSE) +``` + +# The data + +The following code defines the example dataset from Appendix 7 to the FOCUS kinetics +report [@FOCUSkinetics2014, p. 354]. + +```{r, echo = TRUE, fig = TRUE, fig.width = 8, fig.height = 7} +library(mkin, quietly = TRUE) +LOD = 0.5 +FOCUS_2006_Z = data.frame( + t = c(0, 0.04, 0.125, 0.29, 0.54, 1, 2, 3, 4, 7, 10, 14, 21, + 42, 61, 96, 124), + Z0 = c(100, 81.7, 70.4, 51.1, 41.2, 6.6, 4.6, 3.9, 4.6, 4.3, 6.8, + 2.9, 3.5, 5.3, 4.4, 1.2, 0.7), + Z1 = c(0, 18.3, 29.6, 46.3, 55.1, 65.7, 39.1, 36, 15.3, 5.6, 1.1, + 1.6, 0.6, 0.5 * LOD, NA, NA, NA), + Z2 = c(0, NA, 0.5 * LOD, 2.6, 3.8, 15.3, 37.2, 31.7, 35.6, 14.5, + 0.8, 2.1, 1.9, 0.5 * LOD, NA, NA, NA), + Z3 = c(0, NA, NA, NA, NA, 0.5 * LOD, 9.2, 13.1, 22.3, 28.4, 32.5, + 25.2, 17.2, 4.8, 4.5, 2.8, 4.4)) + +FOCUS_2006_Z_mkin <- mkin_wide_to_long(FOCUS_2006_Z) +``` + +# Parent and one metabolite + +The next step is to set up the models used for the kinetic analysis. As the +simultaneous fit of parent and the first metabolite is usually straightforward, +Step 1 (SFO for parent only) is skipped here. We start with the model 2a, +with formation and decline of metabolite Z1 and the pathway from parent +directly to sink included (default in mkin). + +```{r FOCUS_2006_Z_fits_1, echo=TRUE, fig.height=6} +Z.2a <- mkinmod(Z0 = mkinsub("SFO", "Z1"), + Z1 = mkinsub("SFO")) +m.Z.2a <- mkinfit(Z.2a, FOCUS_2006_Z_mkin, quiet = TRUE) +plot_sep(m.Z.2a) +summary(m.Z.2a, data = FALSE)$bpar +``` + +As obvious from the parameter summary (the \texttt{bpar} component of the +summary), the kinetic rate constant from parent compound Z to sink +is very small and the t-test for this parameter suggests that it is +not significantly different from zero. This suggests, in agreement with the +analysis in the FOCUS kinetics report, to simplify the model by removing the +pathway to sink. + +A similar result can be obtained when formation fractions are used in the model +formulation: + +```{r FOCUS_2006_Z_fits_2, echo=TRUE, fig.height=6} +Z.2a.ff <- mkinmod(Z0 = mkinsub("SFO", "Z1"), + Z1 = mkinsub("SFO"), + use_of_ff = "max") + +m.Z.2a.ff <- mkinfit(Z.2a.ff, FOCUS_2006_Z_mkin, quiet = TRUE) +plot_sep(m.Z.2a.ff) +summary(m.Z.2a.ff, data = FALSE)$bpar +``` + +Here, the ilr transformed formation fraction fitted in the model takes a very +large value, and the backtransformed formation fraction from parent Z to Z1 is +practically unity. Here, the covariance matrix used for the calculation +of confidence intervals is not returned as the model is +overparameterised. + +A simplified model is obtained by removing the pathway to the sink. +\footnote{If the model formulation without formation fractions +is used, the same effect can be obtained by fixing the parameter \texttt{k\_Z\_sink} +to a value of zero.} + +In the following, we use the parameterisation with formation fractions in order +to be able to compare with the results in the FOCUS guidance, and as it +makes it easier to use parameters obtained in a previous fit when adding a further +metabolite. + +```{r FOCUS_2006_Z_fits_3, echo=TRUE, fig.height=6} +Z.3 <- mkinmod(Z0 = mkinsub("SFO", "Z1", sink = FALSE), + Z1 = mkinsub("SFO"), use_of_ff = "max") +m.Z.3 <- mkinfit(Z.3, FOCUS_2006_Z_mkin, quiet = TRUE) +plot_sep(m.Z.3) +summary(m.Z.3, data = FALSE)$bpar +``` + +As there is only one transformation product for Z0 and no pathway +to sink, the formation fraction is internally fixed to unity. + +# Metabolites Z2 and Z3 + +As suggested in the FOCUS report, the pathway to sink was removed for metabolite Z1 as +well in the next step. While this step appears questionable on the basis of the above results, it +is followed here for the purpose of comparison. Also, in the FOCUS report, it is +assumed that there is additional empirical evidence that Z1 quickly and exclusively +hydrolyses to Z2. + +```{r FOCUS_2006_Z_fits_5, echo=TRUE, fig.height=7} +Z.5 <- mkinmod(Z0 = mkinsub("SFO", "Z1", sink = FALSE), + Z1 = mkinsub("SFO", "Z2", sink = FALSE), + Z2 = mkinsub("SFO"), use_of_ff = "max") +m.Z.5 <- mkinfit(Z.5, FOCUS_2006_Z_mkin, quiet = TRUE) +plot_sep(m.Z.5) +``` + +Finally, metabolite Z3 is added to the model. We use the optimised +differential equation parameter values from the previous fit in order to +accelerate the optimization. + +```{r FOCUS_2006_Z_fits_6, echo=TRUE, fig.height=8} +Z.FOCUS <- mkinmod(Z0 = mkinsub("SFO", "Z1", sink = FALSE), + Z1 = mkinsub("SFO", "Z2", sink = FALSE), + Z2 = mkinsub("SFO", "Z3"), + Z3 = mkinsub("SFO"), + use_of_ff = "max") +m.Z.FOCUS <- mkinfit(Z.FOCUS, FOCUS_2006_Z_mkin, + parms.ini = m.Z.5$bparms.ode, + quiet = TRUE) +plot_sep(m.Z.FOCUS) +summary(m.Z.FOCUS, data = FALSE)$bpar +endpoints(m.Z.FOCUS) +``` + +This fit corresponds to the final result chosen in Appendix 7 of the FOCUS +report. Confidence intervals returned by mkin are based on internally +transformed parameters, however. + +# Using the SFORB model + +As the FOCUS report states, there is a certain tailing of the time course of metabolite +Z3. Also, the time course of the parent compound is not fitted very well using the +SFO model, as residues at a certain low level remain. + +Therefore, an additional model is offered here, using the single first-order +reversible binding (SFORB) model for metabolite Z3. As expected, the $\chi^2$ +error level is lower for metabolite Z3 using this model and the graphical +fit for Z3 is improved. However, the covariance matrix is not returned. + +```{r FOCUS_2006_Z_fits_7, echo=TRUE, fig.height=8} +Z.mkin.1 <- mkinmod(Z0 = mkinsub("SFO", "Z1", sink = FALSE), + Z1 = mkinsub("SFO", "Z2", sink = FALSE), + Z2 = mkinsub("SFO", "Z3"), + Z3 = mkinsub("SFORB")) +m.Z.mkin.1 <- mkinfit(Z.mkin.1, FOCUS_2006_Z_mkin, quiet = TRUE) +plot_sep(m.Z.mkin.1) +summary(m.Z.mkin.1, data = FALSE)$cov.unscaled +``` + +Therefore, a further stepwise model building is performed starting from the +stage of parent and two metabolites, starting from the assumption that the model +fit for the parent compound can be improved by using the SFORB model. + +```{r FOCUS_2006_Z_fits_9, echo=TRUE, fig.height=8} +Z.mkin.3 <- mkinmod(Z0 = mkinsub("SFORB", "Z1", sink = FALSE), + Z1 = mkinsub("SFO", "Z2", sink = FALSE), + Z2 = mkinsub("SFO")) +m.Z.mkin.3 <- mkinfit(Z.mkin.3, FOCUS_2006_Z_mkin, quiet = TRUE) +plot_sep(m.Z.mkin.3) +``` + +This results in a much better representation of the behaviour of the parent +compound Z0. + +Finally, Z3 is added as well. These models appear overparameterised (no +covariance matrix returned) if the sink for Z1 is left in the models. + +```{r FOCUS_2006_Z_fits_10, echo=TRUE, fig.height=8} +Z.mkin.4 <- mkinmod(Z0 = mkinsub("SFORB", "Z1", sink = FALSE), + Z1 = mkinsub("SFO", "Z2", sink = FALSE), + Z2 = mkinsub("SFO", "Z3"), + Z3 = mkinsub("SFO")) +m.Z.mkin.4 <- mkinfit(Z.mkin.4, FOCUS_2006_Z_mkin, + parms.ini = m.Z.mkin.3$bparms.ode, + quiet = TRUE) +plot_sep(m.Z.mkin.4) +``` + +The error level of the fit, but especially of metabolite Z3, can be improved if +the SFORB model is chosen for this metabolite, as this model is capable of +representing the tailing of the metabolite decline phase. + +```{r FOCUS_2006_Z_fits_11, echo=TRUE, fig.height=8} +Z.mkin.5 <- mkinmod(Z0 = mkinsub("SFORB", "Z1", sink = FALSE), + Z1 = mkinsub("SFO", "Z2", sink = FALSE), + Z2 = mkinsub("SFO", "Z3"), + Z3 = mkinsub("SFORB")) +m.Z.mkin.5 <- mkinfit(Z.mkin.5, FOCUS_2006_Z_mkin, + parms.ini = m.Z.mkin.4$bparms.ode[1:4], + quiet = TRUE) +plot_sep(m.Z.mkin.5) +``` + +The summary view of the backtransformed parameters shows that we get no +confidence intervals due to overparameterisation. As the optimized +\texttt{k\_Z3\_bound\_free} is excessively small, it seems reasonable to fix it to +zero. + +```{r FOCUS_2006_Z_fits_11a, echo=TRUE} +m.Z.mkin.5a <- mkinfit(Z.mkin.5, FOCUS_2006_Z_mkin, + parms.ini = c(m.Z.mkin.5$bparms.ode[1:7], + k_Z3_bound_free = 0), + fixed_parms = "k_Z3_bound_free", + quiet = TRUE) +plot_sep(m.Z.mkin.5a) +``` + +As expected, the residual plots for Z0 and Z3 are more random than in the case of the +all SFO model for which they were shown above. In conclusion, the model +\texttt{Z.mkin.5a} is proposed as the best-fit model for the dataset from +Appendix 7 of the FOCUS report. + +A graphical representation of the confidence intervals can finally be obtained. + +```{r FOCUS_2006_Z_fits_11b, echo=TRUE} +mkinparplot(m.Z.mkin.5a) +``` + +The endpoints obtained with this model are + +```{r FOCUS_2006_Z_fits_11b_endpoints, echo=TRUE} +endpoints(m.Z.mkin.5a) +``` + +It is clear the degradation rate of Z3 towards the end of the experiment +is very low as DT50\_Z3\_b2 (the second Eigenvalue of the system of two differential +equations representing the SFORB system for Z3, corresponding to the slower rate +constant of the DFOP model) is reported to be infinity. However, this appears +to be a feature of the data. + + +# References + + diff --git a/vignettes/web_only/NAFTA_examples.Rmd b/vignettes/web_only/NAFTA_examples.Rmd deleted file mode 100644 index 26a9240a..00000000 --- a/vignettes/web_only/NAFTA_examples.Rmd +++ /dev/null @@ -1,243 +0,0 @@ ---- -title: "Evaluation of example datasets from Attachment 1 to the US EPA SOP for the NAFTA guidance" -author: "Johannes Ranke" -date: "`r Sys.Date()`" -output: - html_document: - toc: true - toc_float: - collapsed: false - mathjax: null - fig_retina: null -references: -- id: usepa2015 - title: Standard Operating Procedure for Using the NAFTA - Guidance to Calculate Representative Half-life Values and Characterizing - Pesticide Degradation - author: - - family: US EPA - type: report - issued: - year: 2015 - url: https://www.epa.gov/pesticide-science-and-assessing-pesticide-risks/standard-operating-procedure-using-nafta-guidance -vignette: > - %\VignetteIndexEntry{Example evaluation of FOCUS Laboratory Data L1 to L3} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -# Introduction - -In this document, the example evaluations provided in Attachment 1 to the SOP -of US EPA for using the NAFTA guidance [@usepa2015] are repeated using mkin. -The original evaluations reported in the attachment were performed using PestDF -in version 0.8.4. Note that PestDF 0.8.13 is the version distributed at the US -EPA website today (2019-02-26). - -The datasets are now distributed with the mkin package. - -```{r, include = FALSE} -library(knitr) -opts_chunk$set(tidy = FALSE, cache = FALSE, fig.height = 7) -library("mkin", quietly = TRUE) -``` - -# Examples where DFOP did not converge with PestDF 0.8.4 - -In attachment 1, it is reported that the DFOP model does not converge for these -datasets when PestDF 0.8.4 was used. For all four datasets, the DFOP model can -be fitted with mkin (see below). The negative half-life given by PestDF 0.8.4 -for these fits appears to be the result of a bug. The results for the other -two models (SFO and IORE) are the same. - -## Example on page 5, upper panel - -```{r p5a} -p5a <- nafta(NAFTA_SOP_Attachment[["p5a"]]) -plot(p5a) -print(p5a) -``` - -## Example on page 5, lower panel - -```{r p5b} -p5b <- nafta(NAFTA_SOP_Attachment[["p5b"]]) -plot(p5b) -print(p5b) -``` - -## Example on page 6 - -```{r p6} -p6 <- nafta(NAFTA_SOP_Attachment[["p6"]]) -plot(p6) -print(p6) -``` - -## Example on page 7 - -```{r p7} -p7 <- nafta(NAFTA_SOP_Attachment[["p7"]]) -plot(p7) -print(p7) -``` - -# Examples where the representative half-life deviates from the observed DT50 - -## Example on page 8 - -For this dataset, the IORE fit does not converge when the default starting values -used by mkin for the IORE model are used. Therefore, a lower value for the rate -constant is used here. - -```{r p8} -p8 <- nafta(NAFTA_SOP_Attachment[["p8"]], parms.ini = c(k__iore_parent_sink = 1e-3)) -plot(p8) -print(p8) -``` - -# Examples where SFO was not selected for an abiotic study - -## Example on page 9, upper panel - -```{r p9a} -p9a <- nafta(NAFTA_SOP_Attachment[["p9a"]]) -plot(p9a) -print(p9a) -``` - -In this example, the residuals of the SFO indicate a lack of fit of this model, -so even if it was an abiotic experiment, the data do not suggest a simple -exponential decline. - -## Example on page 9, lower panel - -```{r p9b} -p9b <- nafta(NAFTA_SOP_Attachment[["p9b"]]) -plot(p9b) -print(p9b) -``` - -Here, mkin gives a longer slow DT50 for the DFOP model (17.8 days) than -PestDF (13.5 days). Presumably, this is related to the fact that -PestDF gives a negative value for the proportion of the fast degradation -which should be between 0 and 1, inclusive. This parameter is called -f in PestDF and g in mkin. In mkin, it is restricted to the interval from -0 to 1. - -## Example on page 10 - -```{r p10} -p10 <- nafta(NAFTA_SOP_Attachment[["p10"]]) -plot(p10) -print(p10) -``` - -Here, a value below N is given for the IORE model, because the data -suggests a faster decline towards the end of the experiment, which -appears physically rather unlikely in the case of a photolysis study. -It seems PestDF does not constrain N to values above zero, thus -the slight difference in IORE model parameters between PestDF and -mkin. - -# The DT50 was not observed during the study - -## Example on page 11 - -```{r p11} -p11 <- nafta(NAFTA_SOP_Attachment[["p11"]]) -plot(p11) -print(p11) -``` - -In this case, the DFOP fit reported for PestDF resulted -in a negative value for the slower rate constant, which is -not possible in mkin. The other results are in agreement. - -# N is less than 1 and the DFOP rate constants are like the SFO rate constant - -In the following three examples, the same results are obtained with mkin as -reported for PestDF. As in the case on page 10, the N values below 1 are deemed -unrealistic and appear to be the result of an overparameterisation. - - -## Example on page 12, upper panel - -```{r p12a} -p12a <- nafta(NAFTA_SOP_Attachment[["p12a"]]) -plot(p12a) -print(p12a) -``` - -## Example on page 12, lower panel - -```{r p12b} -p12b <- nafta(NAFTA_SOP_Attachment[["p12b"]]) -plot(p12b) -print(p12b) -``` - -## Example on page 13 - -```{r p13} -p13 <- nafta(NAFTA_SOP_Attachment[["p13"]]) -plot(p13) -print(p13) -``` - -# DT50 not observed in the study and DFOP problems in PestDF - -```{r p14} -p14 <- nafta(NAFTA_SOP_Attachment[["p14"]]) -plot(p14) -print(p14) -``` - -The slower rate constant reported by PestDF is negative, which is not -physically realistic, and not possible in mkin. The other fits give the same -results in mkin and PestDF. - -# N is less than 1 and DFOP fraction parameter is below zero - -```{r p15a} -p15a <- nafta(NAFTA_SOP_Attachment[["p15a"]]) -plot(p15a) -print(p15a) -``` - -```{r p15b} -p15b <- nafta(NAFTA_SOP_Attachment[["p15b"]]) -plot(p15b) -print(p15b) -``` - -In mkin, only the IORE fit is affected (deemed unrealistic), as the fraction -parameter of the DFOP model is restricted to the interval between 0 and 1 in -mkin. The SFO fits give the same results for both mkin and PestDF. - -# The DFOP fraction parameter is greater than 1 - -```{r p16} -p16 <- nafta(NAFTA_SOP_Attachment[["p16"]]) -plot(p16) -print(p16) -``` - -In PestDF, the DFOP fit seems to have stuck in a local minimum, as mkin finds -a solution with a much lower $\chi^2$ error level. As the half-life from the -slower rate constant of the DFOP model is larger than the IORE derived half-life, -the NAFTA recommendation obtained with mkin is to use the DFOP representative -half-life of 8.9 days. - -# Conclusions - -The results obtained with mkin deviate from the results obtained with PestDF -either in cases where one of the interpretive rules would apply, i.e. the -IORE parameter N is less than one or the DFOP k values obtained with PestDF are -equal to the SFO k values, or in cases where the DFOP model did not converge, -which often lead to negative rate constants returned by PestDF. - -Therefore, mkin appears to suitable for kinetic evaluations according to the -NAFTA guidance. - -# References diff --git a/vignettes/web_only/NAFTA_examples.rmd b/vignettes/web_only/NAFTA_examples.rmd new file mode 100644 index 00000000..26a9240a --- /dev/null +++ b/vignettes/web_only/NAFTA_examples.rmd @@ -0,0 +1,243 @@ +--- +title: "Evaluation of example datasets from Attachment 1 to the US EPA SOP for the NAFTA guidance" +author: "Johannes Ranke" +date: "`r Sys.Date()`" +output: + html_document: + toc: true + toc_float: + collapsed: false + mathjax: null + fig_retina: null +references: +- id: usepa2015 + title: Standard Operating Procedure for Using the NAFTA + Guidance to Calculate Representative Half-life Values and Characterizing + Pesticide Degradation + author: + - family: US EPA + type: report + issued: + year: 2015 + url: https://www.epa.gov/pesticide-science-and-assessing-pesticide-risks/standard-operating-procedure-using-nafta-guidance +vignette: > + %\VignetteIndexEntry{Example evaluation of FOCUS Laboratory Data L1 to L3} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +# Introduction + +In this document, the example evaluations provided in Attachment 1 to the SOP +of US EPA for using the NAFTA guidance [@usepa2015] are repeated using mkin. +The original evaluations reported in the attachment were performed using PestDF +in version 0.8.4. Note that PestDF 0.8.13 is the version distributed at the US +EPA website today (2019-02-26). + +The datasets are now distributed with the mkin package. + +```{r, include = FALSE} +library(knitr) +opts_chunk$set(tidy = FALSE, cache = FALSE, fig.height = 7) +library("mkin", quietly = TRUE) +``` + +# Examples where DFOP did not converge with PestDF 0.8.4 + +In attachment 1, it is reported that the DFOP model does not converge for these +datasets when PestDF 0.8.4 was used. For all four datasets, the DFOP model can +be fitted with mkin (see below). The negative half-life given by PestDF 0.8.4 +for these fits appears to be the result of a bug. The results for the other +two models (SFO and IORE) are the same. + +## Example on page 5, upper panel + +```{r p5a} +p5a <- nafta(NAFTA_SOP_Attachment[["p5a"]]) +plot(p5a) +print(p5a) +``` + +## Example on page 5, lower panel + +```{r p5b} +p5b <- nafta(NAFTA_SOP_Attachment[["p5b"]]) +plot(p5b) +print(p5b) +``` + +## Example on page 6 + +```{r p6} +p6 <- nafta(NAFTA_SOP_Attachment[["p6"]]) +plot(p6) +print(p6) +``` + +## Example on page 7 + +```{r p7} +p7 <- nafta(NAFTA_SOP_Attachment[["p7"]]) +plot(p7) +print(p7) +``` + +# Examples where the representative half-life deviates from the observed DT50 + +## Example on page 8 + +For this dataset, the IORE fit does not converge when the default starting values +used by mkin for the IORE model are used. Therefore, a lower value for the rate +constant is used here. + +```{r p8} +p8 <- nafta(NAFTA_SOP_Attachment[["p8"]], parms.ini = c(k__iore_parent_sink = 1e-3)) +plot(p8) +print(p8) +``` + +# Examples where SFO was not selected for an abiotic study + +## Example on page 9, upper panel + +```{r p9a} +p9a <- nafta(NAFTA_SOP_Attachment[["p9a"]]) +plot(p9a) +print(p9a) +``` + +In this example, the residuals of the SFO indicate a lack of fit of this model, +so even if it was an abiotic experiment, the data do not suggest a simple +exponential decline. + +## Example on page 9, lower panel + +```{r p9b} +p9b <- nafta(NAFTA_SOP_Attachment[["p9b"]]) +plot(p9b) +print(p9b) +``` + +Here, mkin gives a longer slow DT50 for the DFOP model (17.8 days) than +PestDF (13.5 days). Presumably, this is related to the fact that +PestDF gives a negative value for the proportion of the fast degradation +which should be between 0 and 1, inclusive. This parameter is called +f in PestDF and g in mkin. In mkin, it is restricted to the interval from +0 to 1. + +## Example on page 10 + +```{r p10} +p10 <- nafta(NAFTA_SOP_Attachment[["p10"]]) +plot(p10) +print(p10) +``` + +Here, a value below N is given for the IORE model, because the data +suggests a faster decline towards the end of the experiment, which +appears physically rather unlikely in the case of a photolysis study. +It seems PestDF does not constrain N to values above zero, thus +the slight difference in IORE model parameters between PestDF and +mkin. + +# The DT50 was not observed during the study + +## Example on page 11 + +```{r p11} +p11 <- nafta(NAFTA_SOP_Attachment[["p11"]]) +plot(p11) +print(p11) +``` + +In this case, the DFOP fit reported for PestDF resulted +in a negative value for the slower rate constant, which is +not possible in mkin. The other results are in agreement. + +# N is less than 1 and the DFOP rate constants are like the SFO rate constant + +In the following three examples, the same results are obtained with mkin as +reported for PestDF. As in the case on page 10, the N values below 1 are deemed +unrealistic and appear to be the result of an overparameterisation. + + +## Example on page 12, upper panel + +```{r p12a} +p12a <- nafta(NAFTA_SOP_Attachment[["p12a"]]) +plot(p12a) +print(p12a) +``` + +## Example on page 12, lower panel + +```{r p12b} +p12b <- nafta(NAFTA_SOP_Attachment[["p12b"]]) +plot(p12b) +print(p12b) +``` + +## Example on page 13 + +```{r p13} +p13 <- nafta(NAFTA_SOP_Attachment[["p13"]]) +plot(p13) +print(p13) +``` + +# DT50 not observed in the study and DFOP problems in PestDF + +```{r p14} +p14 <- nafta(NAFTA_SOP_Attachment[["p14"]]) +plot(p14) +print(p14) +``` + +The slower rate constant reported by PestDF is negative, which is not +physically realistic, and not possible in mkin. The other fits give the same +results in mkin and PestDF. + +# N is less than 1 and DFOP fraction parameter is below zero + +```{r p15a} +p15a <- nafta(NAFTA_SOP_Attachment[["p15a"]]) +plot(p15a) +print(p15a) +``` + +```{r p15b} +p15b <- nafta(NAFTA_SOP_Attachment[["p15b"]]) +plot(p15b) +print(p15b) +``` + +In mkin, only the IORE fit is affected (deemed unrealistic), as the fraction +parameter of the DFOP model is restricted to the interval between 0 and 1 in +mkin. The SFO fits give the same results for both mkin and PestDF. + +# The DFOP fraction parameter is greater than 1 + +```{r p16} +p16 <- nafta(NAFTA_SOP_Attachment[["p16"]]) +plot(p16) +print(p16) +``` + +In PestDF, the DFOP fit seems to have stuck in a local minimum, as mkin finds +a solution with a much lower $\chi^2$ error level. As the half-life from the +slower rate constant of the DFOP model is larger than the IORE derived half-life, +the NAFTA recommendation obtained with mkin is to use the DFOP representative +half-life of 8.9 days. + +# Conclusions + +The results obtained with mkin deviate from the results obtained with PestDF +either in cases where one of the interpretive rules would apply, i.e. the +IORE parameter N is less than one or the DFOP k values obtained with PestDF are +equal to the SFO k values, or in cases where the DFOP model did not converge, +which often lead to negative rate constants returned by PestDF. + +Therefore, mkin appears to suitable for kinetic evaluations according to the +NAFTA guidance. + +# References diff --git a/vignettes/web_only/benchmarks.Rmd b/vignettes/web_only/benchmarks.Rmd deleted file mode 100644 index 990c2fee..00000000 --- a/vignettes/web_only/benchmarks.Rmd +++ /dev/null @@ -1,150 +0,0 @@ ---- -title: "Benchmark timings for mkin" -author: "Johannes Ranke" -output: - html_document: - toc: true - toc_float: true - code_folding: show - fig_retina: null -date: "`r Sys.Date()`" -vignette: > - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -library(knitr) -opts_chunk$set(tidy = FALSE, cache = FALSE) -library("mkin") -``` - -Each system is characterized by its CPU type, the operating system type and the -mkin version. Currently only values for one system are available. - -```{r} -cpu_model <- benchmarkme::get_cpu()$model_name -operating_system <- Sys.info()[["sysname"]] -mkin_version <- as.character(packageVersion("mkin")) -system_string <- paste0(operating_system, ", ", cpu_model, ", mkin version ", mkin_version) -load("~/git/mkin/vignettes/web_only/mkin_benchmarks.rda") -mkin_benchmarks[system_string, c("CPU", "OS", "mkin")] <- - c(cpu_model, operating_system, mkin_version) - -if (mkin_version > "0.9.48.1") { - mmkin_bench <- function(models, datasets, error_model = "const") { - mmkin(models, datasets, error_model = error_model, cores = 1, quiet = TRUE) - } -} else { - mmkin_bench <- function(models, datasets, error_model = NULL) { - mmkin(models, datasets, reweight.method = error_model, cores = 1, quiet = TRUE) - } -} -``` - -## Test cases - -Parent only: - -```{r parent_only, warning = FALSE} -FOCUS_C <- FOCUS_2006_C -FOCUS_D <- subset(FOCUS_2006_D, value != 0) -parent_datasets <- list(FOCUS_C, FOCUS_D) - -t1 <- system.time(mmkin_bench(c("SFO", "FOMC", "DFOP", "HS"), parent_datasets))[["elapsed"]] -t2 <- system.time(mmkin_bench(c("SFO", "FOMC", "DFOP", "HS"), parent_datasets, - error_model = "tc"))[["elapsed"]] -``` - -One metabolite: - -```{r one_metabolite} -SFO_SFO <- mkinmod( - parent = mkinsub("SFO", "m1"), - m1 = mkinsub("SFO")) -FOMC_SFO <- mkinmod( - parent = mkinsub("FOMC", "m1"), - m1 = mkinsub("SFO")) -DFOP_SFO <- mkinmod( - parent = mkinsub("FOMC", "m1"), - m1 = mkinsub("SFO")) -t3 <- system.time(mmkin_bench(list(SFO_SFO, FOMC_SFO, DFOP_SFO), list(FOCUS_D)))[["elapsed"]] -t4 <- system.time(mmkin_bench(list(SFO_SFO, FOMC_SFO, DFOP_SFO), list(FOCUS_D), - error_model = "tc"))[["elapsed"]] -t5 <- system.time(mmkin_bench(list(SFO_SFO, FOMC_SFO, DFOP_SFO), list(FOCUS_D), - error_model = "obs"))[["elapsed"]] -``` - -Two metabolites, synthetic data: - -```{r two_metabolites} -m_synth_SFO_lin <- mkinmod(parent = mkinsub("SFO", "M1"), - M1 = mkinsub("SFO", "M2"), - M2 = mkinsub("SFO"), - use_of_ff = "max", quiet = TRUE) - -m_synth_DFOP_par <- mkinmod(parent = mkinsub("DFOP", c("M1", "M2")), - M1 = mkinsub("SFO"), - M2 = mkinsub("SFO"), - use_of_ff = "max", quiet = TRUE) - -SFO_lin_a <- synthetic_data_for_UBA_2014[[1]]$data - -DFOP_par_c <- synthetic_data_for_UBA_2014[[12]]$data - -t6 <- system.time(mmkin_bench(list(m_synth_SFO_lin), list(SFO_lin_a)))[["elapsed"]] -t7 <- system.time(mmkin_bench(list(m_synth_DFOP_par), list(DFOP_par_c)))[["elapsed"]] - -t8 <- system.time(mmkin_bench(list(m_synth_SFO_lin), list(SFO_lin_a), - error_model = "tc"))[["elapsed"]] -t9 <- system.time(mmkin_bench(list(m_synth_DFOP_par), list(DFOP_par_c), - error_model = "tc"))[["elapsed"]] - -t10 <- system.time(mmkin_bench(list(m_synth_SFO_lin), list(SFO_lin_a), - error_model = "obs"))[["elapsed"]] -t11 <- system.time(mmkin_bench(list(m_synth_DFOP_par), list(DFOP_par_c), - error_model = "obs"))[["elapsed"]] -``` - -```{r results} -mkin_benchmarks[system_string, paste0("t", 1:11)] <- - c(t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) -save(mkin_benchmarks, file = "~/git/mkin/vignettes/web_only/mkin_benchmarks.rda") -``` - -## Results - -Currently, we only have benchmark information on one system, therefore only the mkin -version is shown with the results below. Timings are in seconds, shorter is better. - -```{r} -rownames(mkin_benchmarks) <- as.character(mkin_benchmarks$mkin) -``` - - -Benchmarks for all available error models are shown. - -### Parent only - -Constant variance and two-component error model: - -```{r} -kable(mkin_benchmarks[, c("t1", "t2")]) -``` - -### One metabolite - -Constant variance, variance by variable and two-component error model: - -```{r} -kable(mkin_benchmarks[, c("t3", "t4", "t5")]) -``` - -### Two metabolites - -Two different datasets, for each constant variance, variance by variable and -two-component error model are shown: - -```{r} -kable(mkin_benchmarks[, paste0("t", 6:11)]) -``` diff --git a/vignettes/web_only/benchmarks.html b/vignettes/web_only/benchmarks.html index 821399e4..043b777f 100644 --- a/vignettes/web_only/benchmarks.html +++ b/vignettes/web_only/benchmarks.html @@ -11,7 +11,7 @@ - + Benchmark timings for mkin @@ -1583,29 +1583,12 @@ div.tocify {

Benchmark timings for mkin

Johannes Ranke

-

2020-05-12

+

2020-05-13

-

Each system is characterized by its CPU type, the operating system type and the mkin version. Currently only values for one system are available.

-
cpu_model <- benchmarkme::get_cpu()$model_name
-operating_system <- Sys.info()[["sysname"]]
-mkin_version <- as.character(packageVersion("mkin"))
-system_string <- paste0(operating_system, ", ", cpu_model, ", mkin version ", mkin_version)
-load("~/git/mkin/vignettes/web_only/mkin_benchmarks.rda")
-mkin_benchmarks[system_string, c("CPU", "OS", "mkin")] <-
-  c(cpu_model, operating_system, mkin_version)
-
-if (mkin_version > "0.9.48.1") {
-  mmkin_bench <- function(models, datasets, error_model = "const") {
-    mmkin(models, datasets, error_model = error_model, cores = 1, quiet = TRUE)
-  }
-} else {
-  mmkin_bench <- function(models, datasets, error_model = NULL) {
-    mmkin(models, datasets, reweight.method = error_model, cores = 1, quiet = TRUE)
-  }
-}
+

Each system is characterized by its CPU type, the operating system type and the mkin version. Currently only values for one system are available. A compiler was available, so if no analytical solution was available, compiled ODE models are used.

Test cases

Parent only:

@@ -1619,17 +1602,14 @@ t2 <- system.time(mmkin_bench(c("SFO", "FOMC", "DFOP

One metabolite:

SFO_SFO <- mkinmod(
   parent = mkinsub("SFO", "m1"),
-  m1 = mkinsub("SFO"))
-
## Successfully compiled differential equation model from auto-generated C code.
-
FOMC_SFO <- mkinmod(
+  m1 = mkinsub("SFO"))
+FOMC_SFO <- mkinmod(
   parent = mkinsub("FOMC", "m1"),
-  m1 = mkinsub("SFO"))
-
## Successfully compiled differential equation model from auto-generated C code.
-
DFOP_SFO <- mkinmod(
+  m1 = mkinsub("SFO"))
+DFOP_SFO <- mkinmod(
   parent = mkinsub("FOMC", "m1"),
-  m1 = mkinsub("SFO"))
-
## Successfully compiled differential equation model from auto-generated C code.
-
t3 <- system.time(mmkin_bench(list(SFO_SFO, FOMC_SFO, DFOP_SFO), list(FOCUS_D)))[["elapsed"]]
+  m1 = mkinsub("SFO"))
+t3 <- system.time(mmkin_bench(list(SFO_SFO, FOMC_SFO, DFOP_SFO), list(FOCUS_D)))[["elapsed"]]
 t4 <- system.time(mmkin_bench(list(SFO_SFO, FOMC_SFO, DFOP_SFO), list(FOCUS_D),
     error_model = "tc"))[["elapsed"]]
 t5 <- system.time(mmkin_bench(list(SFO_SFO, FOMC_SFO, DFOP_SFO), list(FOCUS_D),
@@ -1667,127 +1647,123 @@ save(mkin_benchmarks, file = "~/git/mkin/vignettes/web_only/mkin_benchmarks
 

Results

-

Currently, we only have benchmark information on one system, therefore only the mkin version is shown with the results below. Timings are in seconds, shorter is better.

-
rownames(mkin_benchmarks) <- as.character(mkin_benchmarks$mkin)
+

Currently, we only have benchmark information on one system, therefore only the mkin version is shown with the results below. Timings are in seconds, shorter is better. All results were obtained by serial, i.e. not using multiple computing cores.

Benchmarks for all available error models are shown.

Parent only

-

Constant variance and two-component error model:

-
kable(mkin_benchmarks[, c("t1", "t2")])
+

Constant variance (t1) and two-component error model (t2) for four models fitted to two datasets, i.e. eight fits for each test.

- - - + + + - + - + - + - + - + - - - + + +
t1t2mkin versiont1 [s]t2 [s]
0.9.48.10.9.48.1 3.610 11.019
0.9.49.10.9.49.1 8.184 22.889
0.9.49.20.9.49.2 7.064 12.558
0.9.49.30.9.49.3 7.296 21.239
0.9.49.40.9.49.4 5.936 20.545
0.9.50.21.5593.9290.9.50.21.5473.928

One metabolite

-

Constant variance, variance by variable and two-component error model:

-
kable(mkin_benchmarks[, c("t3", "t4", "t5")])
+

Constant variance (t3), two-component error model (t4), and variance by variable (t5) for three models fitted to one dataset, i.e. three fits for each test.

- - - - + + + + - + - + - + - + - + - - - - + + + +
t3t4t5mkin versiont3 [s]t4 [s]t5 [s]
0.9.48.10.9.48.1 3.764 14.347 9.495
0.9.49.10.9.49.1 4.649 13.789 6.395
0.9.49.20.9.49.2 4.786 8.461 5.675
0.9.49.30.9.49.3 4.510 13.805 7.386
0.9.49.40.9.49.4 4.446 15.335 6.002
0.9.50.21.3526.1102.8410.9.50.21.3716.1542.720

Two metabolites

-

Two different datasets, for each constant variance, variance by variable and two-component error model are shown:

-
kable(mkin_benchmarks[, paste0("t", 6:11)])
+

Constant variance (t6 and t7), two-component error model (t8 and t9), and variance by variable (t10 and t11) for one model fitted to one dataset, i.e. one fit for each test.

- - - - - - - + + + + + + + - + @@ -1796,7 +1772,7 @@ save(mkin_benchmarks, file = "~/git/mkin/vignettes/web_only/mkin_benchmarks - + @@ -1805,7 +1781,7 @@ save(mkin_benchmarks, file = "~/git/mkin/vignettes/web_only/mkin_benchmarks - + @@ -1814,7 +1790,7 @@ save(mkin_benchmarks, file = "~/git/mkin/vignettes/web_only/mkin_benchmarks - + @@ -1823,7 +1799,7 @@ save(mkin_benchmarks, file = "~/git/mkin/vignettes/web_only/mkin_benchmarks - + @@ -1832,13 +1808,13 @@ save(mkin_benchmarks, file = "~/git/mkin/vignettes/web_only/mkin_benchmarks - - - - - - - + + + + + + +
t6t7t8t9t10t11mkin versiont6 [s]t7 [s]t8 [s]t9 [s]t10 [s]t11 [s]
0.9.48.10.9.48.1 2.623 4.587 7.52531.267
0.9.49.10.9.49.1 2.542 4.128 4.6325.636
0.9.49.20.9.49.2 2.723 4.478 4.8625.574
0.9.49.30.9.49.3 2.643 4.374 7.0207.365
0.9.49.40.9.49.4 2.635 4.259 4.7375.626
0.9.50.20.7591.2041.2752.8372.0262.9760.9.50.20.7421.2031.2822.8452.0372.987
diff --git a/vignettes/web_only/benchmarks.rmd b/vignettes/web_only/benchmarks.rmd new file mode 100644 index 00000000..7ae12451 --- /dev/null +++ b/vignettes/web_only/benchmarks.rmd @@ -0,0 +1,157 @@ +--- +title: "Benchmark timings for mkin" +author: "Johannes Ranke" +output: + html_document: + toc: true + toc_float: true + code_folding: show + fig_retina: null +date: "`r Sys.Date()`" +vignette: > + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +library(knitr) +opts_chunk$set(tidy = FALSE, cache = FALSE) +library("mkin") +``` + +Each system is characterized by its CPU type, the operating system type and the +mkin version. Currently only values for one system are available. A compiler +was available, so if no analytical solution was available, compiled ODE models are +used. + +```{r include = FALSE} +cpu_model <- benchmarkme::get_cpu()$model_name +operating_system <- Sys.info()[["sysname"]] +mkin_version <- as.character(packageVersion("mkin")) +system_string <- paste0(operating_system, ", ", cpu_model, ", mkin version ", mkin_version) +load("~/git/mkin/vignettes/web_only/mkin_benchmarks.rda") +mkin_benchmarks[system_string, c("CPU", "OS", "mkin")] <- + c(cpu_model, operating_system, mkin_version) + +if (mkin_version > "0.9.48.1") { + mmkin_bench <- function(models, datasets, error_model = "const") { + mmkin(models, datasets, error_model = error_model, cores = 1, quiet = TRUE) + } +} else { + mmkin_bench <- function(models, datasets, error_model = NULL) { + mmkin(models, datasets, reweight.method = error_model, cores = 1, quiet = TRUE) + } +} +``` + +## Test cases + +Parent only: + +```{r parent_only, warning = FALSE} +FOCUS_C <- FOCUS_2006_C +FOCUS_D <- subset(FOCUS_2006_D, value != 0) +parent_datasets <- list(FOCUS_C, FOCUS_D) + +t1 <- system.time(mmkin_bench(c("SFO", "FOMC", "DFOP", "HS"), parent_datasets))[["elapsed"]] +t2 <- system.time(mmkin_bench(c("SFO", "FOMC", "DFOP", "HS"), parent_datasets, + error_model = "tc"))[["elapsed"]] +``` + +One metabolite: + +```{r one_metabolite, message = FALSE} +SFO_SFO <- mkinmod( + parent = mkinsub("SFO", "m1"), + m1 = mkinsub("SFO")) +FOMC_SFO <- mkinmod( + parent = mkinsub("FOMC", "m1"), + m1 = mkinsub("SFO")) +DFOP_SFO <- mkinmod( + parent = mkinsub("FOMC", "m1"), + m1 = mkinsub("SFO")) +t3 <- system.time(mmkin_bench(list(SFO_SFO, FOMC_SFO, DFOP_SFO), list(FOCUS_D)))[["elapsed"]] +t4 <- system.time(mmkin_bench(list(SFO_SFO, FOMC_SFO, DFOP_SFO), list(FOCUS_D), + error_model = "tc"))[["elapsed"]] +t5 <- system.time(mmkin_bench(list(SFO_SFO, FOMC_SFO, DFOP_SFO), list(FOCUS_D), + error_model = "obs"))[["elapsed"]] +``` + +Two metabolites, synthetic data: + +```{r two_metabolites, message = FALSE} +m_synth_SFO_lin <- mkinmod(parent = mkinsub("SFO", "M1"), + M1 = mkinsub("SFO", "M2"), + M2 = mkinsub("SFO"), + use_of_ff = "max", quiet = TRUE) + +m_synth_DFOP_par <- mkinmod(parent = mkinsub("DFOP", c("M1", "M2")), + M1 = mkinsub("SFO"), + M2 = mkinsub("SFO"), + use_of_ff = "max", quiet = TRUE) + +SFO_lin_a <- synthetic_data_for_UBA_2014[[1]]$data + +DFOP_par_c <- synthetic_data_for_UBA_2014[[12]]$data + +t6 <- system.time(mmkin_bench(list(m_synth_SFO_lin), list(SFO_lin_a)))[["elapsed"]] +t7 <- system.time(mmkin_bench(list(m_synth_DFOP_par), list(DFOP_par_c)))[["elapsed"]] + +t8 <- system.time(mmkin_bench(list(m_synth_SFO_lin), list(SFO_lin_a), + error_model = "tc"))[["elapsed"]] +t9 <- system.time(mmkin_bench(list(m_synth_DFOP_par), list(DFOP_par_c), + error_model = "tc"))[["elapsed"]] + +t10 <- system.time(mmkin_bench(list(m_synth_SFO_lin), list(SFO_lin_a), + error_model = "obs"))[["elapsed"]] +t11 <- system.time(mmkin_bench(list(m_synth_DFOP_par), list(DFOP_par_c), + error_model = "obs"))[["elapsed"]] +``` + +```{r results} +mkin_benchmarks[system_string, paste0("t", 1:11)] <- + c(t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) +save(mkin_benchmarks, file = "~/git/mkin/vignettes/web_only/mkin_benchmarks.rda") +``` + +## Results + +Currently, we only have benchmark information on one system, therefore only the mkin +version is shown with the results below. Timings are in seconds, shorter is better. +All results were obtained by serial, i.e. not using multiple computing cores. + +```{r, include = FALSE} +dimnames(mkin_benchmarks) <- list(as.character(mkin_benchmarks$mkin), + c("CPU", "OS", "mkin version", paste0("t", 1:11, " [s]"))) +``` + + +Benchmarks for all available error models are shown. + +### Parent only + +Constant variance (t1) and two-component error model (t2) for four models +fitted to two datasets, i.e. eight fits for each test. + +```{r, echo = FALSE} +kable(mkin_benchmarks[, 4:5], rownames = "mkin version") +``` + +### One metabolite + +Constant variance (t3), two-component error model (t4), and variance by variable (t5) +for three models fitted to one dataset, i.e. three fits for each test. + +```{r, echo = FALSE} +kable(mkin_benchmarks[, 6:8], rownames = "mkin version") +``` + +### Two metabolites + +Constant variance (t6 and t7), two-component error model (t8 and t9), and +variance by variable (t10 and t11) for one model fitted to one dataset, i.e. +one fit for each test. + +```{r, echo = FALSE} +kable(mkin_benchmarks[, 9:14], rownames = "mkin version") +``` diff --git a/vignettes/web_only/compiled_models.Rmd b/vignettes/web_only/compiled_models.Rmd deleted file mode 100644 index f99ea808..00000000 --- a/vignettes/web_only/compiled_models.Rmd +++ /dev/null @@ -1,141 +0,0 @@ ---- -title: "Performance benefit by using compiled model definitions in mkin" -author: "Johannes Ranke" -output: - html_document: - toc: true - toc_float: true - code_folding: show - fig_retina: null -date: "`r Sys.Date()`" -vignette: > - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -library(knitr) -opts_chunk$set(tidy = FALSE, cache = FALSE) -``` - -## How to benefit from compiled models - -When using an mkin version equal to or greater than 0.9-36 and a C compiler is -available, you will see a message that the model is being compiled from -autogenerated C code when defining a model using mkinmod. Starting from -version 0.9.49.9, the `mkinmod()` function checks for presence of a compiler -using - -```{r check_gcc, eval = FALSE} -pkgbuild::has_compiler() -``` - -In previous versions, it used `Sys.which("gcc")` for this check. - -On Linux, you need to have the essential build tools like make and gcc or clang -installed. On Debian based linux distributions, these will be pulled in by installing -the build-essential package. - -On MacOS, which I do not use personally, I have had reports that a compiler is -available by default. - -On Windows, you need to install Rtools and have the path to its bin directory -in your PATH variable. You do not need to modify the PATH variable when -installing Rtools. Instead, I would recommend to put the line - -```{r Rprofile, eval = FALSE} -Sys.setenv(PATH = paste("C:/Rtools/bin", Sys.getenv("PATH"), sep=";")) -``` -into your .Rprofile startup file. This is just a text file with some R -code that is executed when your R session starts. It has to be named .Rprofile -and has to be located in your home directory, which will generally be your -Documents folder. You can check the location of the home directory used by R by -issuing - -```{r HOME, eval = FALSE} -Sys.getenv("HOME") -``` - -## Comparison with other solution methods - -First, we build a simple degradation model for a parent compound with one metabolite, -and we remove zero values from the dataset. - -```{r create_SFO_SFO} -library("mkin", quietly = TRUE) -SFO_SFO <- mkinmod( - parent = mkinsub("SFO", "m1"), - m1 = mkinsub("SFO")) -FOCUS_D <- subset(FOCUS_2006_D, value != 0) -``` - -We can compare the performance of the Eigenvalue based solution against the -compiled version and the R implementation of the differential equations using -the benchmark package. In the output of below code, the warnings about zero -being removed from the FOCUS D dataset are suppressed. Since mkin version -0.9.49.11, an analytical solution is also implemented, which is included -in the tests below. - -```{r benchmark_SFO_SFO, fig.height = 3, message = FALSE, warning = FALSE} -if (require(rbenchmark)) { - b.1 <- benchmark( - "deSolve, not compiled" = mkinfit(SFO_SFO, FOCUS_D, - solution_type = "deSolve", - use_compiled = FALSE, quiet = TRUE), - "Eigenvalue based" = mkinfit(SFO_SFO, FOCUS_D, - solution_type = "eigen", quiet = TRUE), - "deSolve, compiled" = mkinfit(SFO_SFO, FOCUS_D, - solution_type = "deSolve", quiet = TRUE), - "analytical" = mkinfit(SFO_SFO, FOCUS_D, - solution_type = "analytical", - use_compiled = FALSE, quiet = TRUE), - replications = 1, order = "relative", - columns = c("test", "replications", "relative", "elapsed")) - print(b.1) -} else { - print("R package rbenchmark is not available") -} -``` - -We see that using the compiled model is by more than a factor of 10 faster -than using deSolve without compiled code. - -## Model without analytical solution - -This evaluation is also taken from the example section of mkinfit. No analytical -solution is available for this system, and now Eigenvalue based solution -is possible, so only deSolve using with or without compiled code is -available. - -```{r benchmark_FOMC_SFO, fig.height = 3, warning = FALSE} -if (require(rbenchmark)) { - FOMC_SFO <- mkinmod( - parent = mkinsub("FOMC", "m1"), - m1 = mkinsub( "SFO")) - - b.2 <- benchmark( - "deSolve, not compiled" = mkinfit(FOMC_SFO, FOCUS_D, - use_compiled = FALSE, quiet = TRUE), - "deSolve, compiled" = mkinfit(FOMC_SFO, FOCUS_D, quiet = TRUE), - replications = 1, order = "relative", - columns = c("test", "replications", "relative", "elapsed")) - print(b.2) - factor_FOMC_SFO <- round(b.2["1", "relative"]) -} else { - factor_FOMC_SFO <- NA - print("R package benchmark is not available") -} -``` - -Here we get a performance benefit of a factor of -`r factor_FOMC_SFO` -using the version of the differential equation model compiled from C code! - -This vignette was built with mkin `r utils::packageVersion("mkin")` on - -```{r sessionInfo, echo = FALSE} -cat(utils::capture.output(utils::sessionInfo())[1:3], sep = "\n") -if(!inherits(try(cpuinfo <- readLines("/proc/cpuinfo")), "try-error")) { - cat(gsub("model name\t: ", "CPU model: ", cpuinfo[grep("model name", cpuinfo)[1]])) -} -``` diff --git a/vignettes/web_only/compiled_models.rmd b/vignettes/web_only/compiled_models.rmd new file mode 100644 index 00000000..0b8c617a --- /dev/null +++ b/vignettes/web_only/compiled_models.rmd @@ -0,0 +1,141 @@ +--- +title: "Performance benefit by using compiled model definitions in mkin" +author: "Johannes Ranke" +output: + html_document: + toc: true + toc_float: true + code_folding: show + fig_retina: null +date: "`r Sys.Date()`" +vignette: > + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +library(knitr) +opts_chunk$set(tidy = FALSE, cache = FALSE) +``` + +## How to benefit from compiled models + +When using an mkin version equal to or greater than 0.9-36 and a C compiler is +available, you will see a message that the model is being compiled from +autogenerated C code when defining a model using mkinmod. Starting from +version 0.9.49.9, the `mkinmod()` function checks for presence of a compiler +using + +```{r check_gcc, eval = FALSE} +pkgbuild::has_compiler() +``` + +In previous versions, it used `Sys.which("gcc")` for this check. + +On Linux, you need to have the essential build tools like make and gcc or clang +installed. On Debian based linux distributions, these will be pulled in by installing +the build-essential package. + +On MacOS, which I do not use personally, I have had reports that a compiler is +available by default. + +On Windows, you need to install Rtools and have the path to its bin directory +in your PATH variable. You do not need to modify the PATH variable when +installing Rtools. Instead, I would recommend to put the line + +```{r Rprofile, eval = FALSE} +Sys.setenv(PATH = paste("C:/Rtools/bin", Sys.getenv("PATH"), sep=";")) +``` +into your .Rprofile startup file. This is just a text file with some R +code that is executed when your R session starts. It has to be named .Rprofile +and has to be located in your home directory, which will generally be your +Documents folder. You can check the location of the home directory used by R by +issuing + +```{r HOME, eval = FALSE} +Sys.getenv("HOME") +``` + +## Comparison with other solution methods + +First, we build a simple degradation model for a parent compound with one metabolite, +and we remove zero values from the dataset. + +```{r create_SFO_SFO} +library("mkin", quietly = TRUE) +SFO_SFO <- mkinmod( + parent = mkinsub("SFO", "m1"), + m1 = mkinsub("SFO")) +FOCUS_D <- subset(FOCUS_2006_D, value != 0) +``` + +We can compare the performance of the Eigenvalue based solution against the +compiled version and the R implementation of the differential equations using +the benchmark package. In the output of below code, the warnings about zero +being removed from the FOCUS D dataset are suppressed. Since mkin version +0.9.49.11, an analytical solution is also implemented, which is included +in the tests below. + +```{r benchmark_SFO_SFO, fig.height = 3, message = FALSE, warning = FALSE} +if (require(rbenchmark)) { + b.1 <- benchmark( + "deSolve, not compiled" = mkinfit(SFO_SFO, FOCUS_D, + solution_type = "deSolve", + use_compiled = FALSE, quiet = TRUE), + "Eigenvalue based" = mkinfit(SFO_SFO, FOCUS_D, + solution_type = "eigen", quiet = TRUE), + "deSolve, compiled" = mkinfit(SFO_SFO, FOCUS_D, + solution_type = "deSolve", quiet = TRUE), + "analytical" = mkinfit(SFO_SFO, FOCUS_D, + solution_type = "analytical", + use_compiled = FALSE, quiet = TRUE), + replications = 1, order = "relative", + columns = c("test", "replications", "relative", "elapsed")) + print(b.1) +} else { + print("R package rbenchmark is not available") +} +``` + +We see that using the compiled model is by more than a factor of 10 faster +than using deSolve without compiled code. + +## Model without analytical solution + +This evaluation is also taken from the example section of mkinfit. No analytical +solution is available for this system, and now Eigenvalue based solution +is possible, so only deSolve using with or without compiled code is +available. + +```{r benchmark_FOMC_SFO, fig.height = 3, warning = FALSE} +if (require(rbenchmark)) { + FOMC_SFO <- mkinmod( + parent = mkinsub("FOMC", "m1"), + m1 = mkinsub( "SFO")) + + b.2 <- benchmark( + "deSolve, not compiled" = mkinfit(FOMC_SFO, FOCUS_D, + use_compiled = FALSE, quiet = TRUE), + "deSolve, compiled" = mkinfit(FOMC_SFO, FOCUS_D, quiet = TRUE), + replications = 1, order = "relative", + columns = c("test", "replications", "relative", "elapsed")) + print(b.2) + factor_FOMC_SFO <- round(b.2["1", "relative"]) +} else { + factor_FOMC_SFO <- NA + print("R package benchmark is not available") +} +``` + +Here we get a performance benefit of a factor of +`r factor_FOMC_SFO` +using the version of the differential equation model compiled from C code! + +This vignette was built with mkin `r utils::packageVersion("mkin")` on + +```{r sessionInfo, echo = FALSE} +cat(utils::capture.output(utils::sessionInfo())[1:3], sep = "\n") +if(!inherits(try(cpuinfo <- readLines("/proc/cpuinfo")), "try-error")) { + cat(gsub("model name\t: ", "CPU model: ", cpuinfo[grep("model name", cpuinfo)[1]])) +} +``` diff --git a/vignettes/web_only/mkin_benchmarks.rda b/vignettes/web_only/mkin_benchmarks.rda index 268e5efe..c88ca4d6 100644 Binary files a/vignettes/web_only/mkin_benchmarks.rda and b/vignettes/web_only/mkin_benchmarks.rda differ -- cgit v1.2.1