From 24eb77216700cf8b2f2bde3abad84c1f83f9e32a Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Mon, 9 Jan 2023 06:22:04 +0100 Subject: Prebuilt PDF vignettes, summary_listing --- vignettes/prebuilt/2022_cyan_pathway.rmd | 536 +++++++++++++++++++++++++++++++ vignettes/prebuilt/2022_dmta_parent.rmd | 406 +++++++++++++++++++++++ vignettes/prebuilt/2022_dmta_pathway.rmd | 426 ++++++++++++++++++++++++ vignettes/prebuilt/references.bib | 187 +++++++++++ 4 files changed, 1555 insertions(+) create mode 100644 vignettes/prebuilt/2022_cyan_pathway.rmd create mode 100644 vignettes/prebuilt/2022_dmta_parent.rmd create mode 100644 vignettes/prebuilt/2022_dmta_pathway.rmd create mode 100644 vignettes/prebuilt/references.bib (limited to 'vignettes/prebuilt') diff --git a/vignettes/prebuilt/2022_cyan_pathway.rmd b/vignettes/prebuilt/2022_cyan_pathway.rmd new file mode 100644 index 00000000..df34a6f2 --- /dev/null +++ b/vignettes/prebuilt/2022_cyan_pathway.rmd @@ -0,0 +1,536 @@ +--- +title: "Testing hierarchical pathway kinetics with residue data on cyantraniliprole" +author: Johannes Ranke +date: Last change on 6 January 2023, last compiled on `r format(Sys.time(), "%e + %B %Y")` +output: + pdf_document: + extra_dependencies: ["float", "listing"] +toc: yes +geometry: margin=2cm +--- + +```{r setup, echo = FALSE, cache = FALSE} +options(width = 80) # For summary listings +knitr::opts_chunk$set( + comment = "", tidy = FALSE, cache = TRUE, fig.pos = "H", fig.align = "center" +) +``` + +\clearpage + +# Introduction + +The purpose of this document is to test demonstrate how nonlinear hierarchical +models (NLHM) based on the parent degradation models SFO, FOMC, DFOP and HS, +with serial formation of two or more metabolites can be fitted with the mkin +package. + +It was assembled in the course of work package 1.2 of Project Number 173340 +(Application of nonlinear hierarchical models to the kinetic evaluation of +chemical degradation data) of the German Environment Agency carried out in 2022 +and 2023. + +The mkin package is used in version `r packageVersion("mkin")` which is +currently under development. The newly introduced functionality that is +used here is a simplification of excluding random effects for a set of fits +based on a related set of fits with a reduced model, and the documentation of +the starting parameters of the fit, so that all starting parameters of `saem` +fits are now listed in the summary. The `saemix` package is used as a backend +for fitting the NLHM, but is also loaded to make the convergence plot function +available. + +This document is processed with the `knitr` package, which also provides the +`kable` function that is used to improve the display of tabular data in R +markdown documents. For parallel processing, the `parallel` package is used. + +```{r, packages, cache = FALSE, message = FALSE} +library(mkin) +library(knitr) +library(saemix) +library(parallel) +n_cores <- detectCores() +if (Sys.info()["sysname"] == "Windows") { + cl <- makePSOCKcluster(n_cores) +} else { + cl <- makeForkCluster(n_cores) +} +``` + +\clearpage + +## Test data + +The example data are taken from the final addendum to the DAR from 2014 +and are distributed with the mkin package. Residue data and time step +normalisation factors are read in using the function `read_spreadsheet` from +the mkin package. This function also performs the time step normalisation. + +```{r data} +data_file <- system.file( + "testdata", "cyantraniliprole_soil_efsa_2014.xlsx", + package = "mkin") +cyan_ds <- read_spreadsheet(data_file, parent_only = FALSE) +``` + +The following tables show the covariate data and the `r length(cyan_ds)` +datasets that were read in from the spreadsheet file. + +```{r show-covar-data, dependson = "data", results = "asis"} +pH <- attr(cyan_ds, "covariates") +kable(pH, caption = "Covariate data") +``` + +\clearpage + +```{r show-data, dependson = "data", results = "asis"} +for (ds_name in names(cyan_ds)) { + print( + kable(mkin_long_to_wide(cyan_ds[[ds_name]]), + caption = paste("Dataset", ds_name), + booktabs = TRUE, row.names = FALSE)) + cat("\n\\clearpage\n") +} +``` + +\clearpage + +# Parent only evaluations + +As the pathway fits have very long run times, evaluations of the parent data +are performed first, in order to determine for each hierarchical parent +degradation model which random effects on the degradation model parameters are +ill-defined. + +```{r parent-only, dependson = "data"} +cyan_sep_const <- mmkin(c("SFO", "FOMC", "DFOP", "SFORB", "HS"), + cyan_ds, quiet = TRUE, cores = n_cores) +cyan_sep_tc <- update(cyan_sep_const, error_model = "tc") +cyan_saem_full <- mhmkin(list(cyan_sep_const, cyan_sep_tc)) +status(cyan_saem_full) |> kable() +``` + +All fits converged successfully. + +```{r dependson = "parent-only"} +illparms(cyan_saem_full) |> kable() +``` +In almost all models, the random effect for the initial concentration of the +parent compound is ill-defined. For the biexponential models DFOP and SFORB, +the random effect of one additional parameter is ill-defined when the two-component +error model is used. + +```{r dependson = "parent-only"} +anova(cyan_saem_full) |> kable(digits = 1) +``` + +Model comparison based on AIC and BIC indicates that the two-component error model +is preferable for all parent models with the exception of DFOP. The lowest AIC +and BIC values are are obtained with the FOMC model, followed by SFORB and DFOP. + +```{r parent-only-reduced, dependson = "parent-only", include = FALSE} +cyan_saem_reduced <- mhmkin(list(cyan_sep_const, cyan_sep_tc), + no_random_effect = illparms(cyan_saem_full)) +illparms(cyan_saem_reduced) +anova(cyan_saem_reduced) |> kable(digits = 1) +``` + +# Pathway fits + +## Evaluations with pathway established previously + +To test the technical feasibility of coupling the relevant parent degradation +models with different transformation pathway models, a list of `mkinmod` models +is set up below. As in the EU evaluation, parallel formation of metabolites +JCZ38 and J9Z38 and secondary formation of metabolite JSE76 from JCZ38 is used. + +```{r, cyan-path-1} +if (!dir.exists("cyan_dlls")) dir.create("cyan_dlls") +cyan_path_1 <- list( + sfo_path_1 = mkinmod( + cyan = mkinsub("SFO", c("JCZ38", "J9Z38")), + JCZ38 = mkinsub("SFO", "JSE76"), + J9Z38 = mkinsub("SFO"), + JSE76 = mkinsub("SFO"), quiet = TRUE, + name = "sfo_path_1", dll_dir = "cyan_dlls", overwrite = TRUE), + fomc_path_1 = mkinmod( + cyan = mkinsub("FOMC", c("JCZ38", "J9Z38")), + JCZ38 = mkinsub("SFO", "JSE76"), + J9Z38 = mkinsub("SFO"), + JSE76 = mkinsub("SFO"), quiet = TRUE, + name = "fomc_path_1", dll_dir = "cyan_dlls", overwrite = TRUE), + dfop_path_1 = mkinmod( + cyan = mkinsub("DFOP", c("JCZ38", "J9Z38")), + JCZ38 = mkinsub("SFO", "JSE76"), + J9Z38 = mkinsub("SFO"), + JSE76 = mkinsub("SFO"), quiet = TRUE, + name = "dfop_path_1", dll_dir = "cyan_dlls", overwrite = TRUE), + sforb_path_1 = mkinmod( + cyan = mkinsub("SFORB", c("JCZ38", "J9Z38")), + JCZ38 = mkinsub("SFO", "JSE76"), + J9Z38 = mkinsub("SFO"), + JSE76 = mkinsub("SFO"), quiet = TRUE, + name = "sforb_path_1", dll_dir = "cyan_dlls", overwrite = TRUE), + hs_path_1 = mkinmod( + cyan = mkinsub("HS", c("JCZ38", "J9Z38")), + JCZ38 = mkinsub("SFO", "JSE76"), + J9Z38 = mkinsub("SFO"), + JSE76 = mkinsub("SFO"), quiet = TRUE, + name = "hs_path_1", dll_dir = "cyan_dlls", overwrite = TRUE) +) +``` +To obtain suitable starting values for the NLHM fits, separate pathway fits are +performed for all datasets. + +```{r, f-sep-1, dependson = c("data", "cyan_path_1")} +f_sep_1_const <- mmkin( + cyan_path_1, + cyan_ds, + error_model = "const", + cluster = cl, + quiet = TRUE) +status(f_sep_1_const) |> kable() + +f_sep_1_tc <- update(f_sep_1_const, error_model = "tc") +status(f_sep_1_tc) |> kable() +``` + +Most separate fits converged successfully. The biggest convergence +problems are seen when using the HS model with constant variance. + +For the hierarchical pathway fits, those random effects that could not be +quantified in the corresponding parent data analyses are excluded. + +In the code below, the output of the `illparms` function for the parent only +fits is used as an argument `no_random_effect` to the `mhmkin` function. +The possibility to do so was introduced in mkin version `1.2.2` which is +currently under development. + +```{r, f-saem-1, dependson = "f-sep-1"} +f_saem_1 <- mhmkin(list(f_sep_1_const, f_sep_1_tc), + no_random_effect = illparms(cyan_saem_full), + cluster = cl) +``` + +```{r dependson = "f-saem-1"} +status(f_saem_1) |> kable() +``` + +The status information from the individual fits shows that all fits completed +successfully. The matrix entries Fth and FO indicate that the Fisher +Information Matrix could not be inverted for the fixed effects (theta) +and the random effects (Omega), respectively. For the affected fits, +ill-defined parameters cannot be determined using the `illparms` function, +because it relies on the Fisher Information Matrix. + +```{r dependson = "f-saem-1"} +illparms(f_saem_1) |> kable() +``` + +The model comparison below suggests that the pathway fits using +DFOP or SFORB for the parent compound provide the best fit. + +```{r, dependson = "f-saem-1"} +anova(f_saem_1) |> kable(digits = 1) +``` + +For these two parent model, successful fits are shown below. Plots of the fits +with the other parent models are shown in the Appendix. + +```{r fig.cap = "DFOP pathway fit with two-component error", dependson = "f-saem-1", fig.height = 8} +plot(f_saem_1[["dfop_path_1", "tc"]]) +``` + +\clearpage + +```{r fig.cap = "SFORB pathway fit with two-component error", dependson = "f-saem-1", fig.height = 8} +plot(f_saem_1[["sforb_path_1", "tc"]]) +``` + +A closer graphical analysis of these Figures shows that the residues of +transformation product JCZ38 in the soils Tama and Nambsheim observed +at later time points are strongly and systematically underestimated. + +\clearpage + + +## Alternative pathway fits + +To improve the fit for JCZ38, a back-reaction from JSE76 to JCZ38 was +introduced in an alternative version of the transformation pathway, in analogy +to the back-reaction from K5A78 to K5A77. Both pairs of transformation products +are pairs of an organic acid with its corresponding amide (Addendum 2014, p. +109). As FOMC provided the best fit for the parent, and the biexponential +models DFOP and SFORB provided the best initial pathway fits, these three +parent models are used in the alternative pathway fits. + +```{r, f-sep-2-const, dependson = "data"} +cyan_path_2 <- list( + fomc_path_2 = mkinmod( + cyan = mkinsub("FOMC", c("JCZ38", "J9Z38")), + JCZ38 = mkinsub("SFO", "JSE76"), + J9Z38 = mkinsub("SFO"), + JSE76 = mkinsub("SFO", "JCZ38"), + name = "fomc_path_2", quiet = TRUE, + dll_dir = "cyan_dlls", + overwrite = TRUE + ), + dfop_path_2 = mkinmod( + cyan = mkinsub("DFOP", c("JCZ38", "J9Z38")), + JCZ38 = mkinsub("SFO", "JSE76"), + J9Z38 = mkinsub("SFO"), + JSE76 = mkinsub("SFO", "JCZ38"), + name = "dfop_path_2", quiet = TRUE, + dll_dir = "cyan_dlls", + overwrite = TRUE + ), + sforb_path_2 = mkinmod( + cyan = mkinsub("SFORB", c("JCZ38", "J9Z38")), + JCZ38 = mkinsub("SFO", "JSE76"), + J9Z38 = mkinsub("SFO"), + JSE76 = mkinsub("SFO", "JCZ38"), + name = "sforb_path_2", quiet = TRUE, + dll_dir = "cyan_dlls", + overwrite = TRUE + ) +) +f_sep_2_const <- mmkin( + cyan_path_2, + cyan_ds, + error_model = "const", + cluster = cl, + quiet = TRUE) + +status(f_sep_2_const) |> kable() +``` + +Using constant variance, separate fits converge with the exception +of the fits to the Sassafras soil data. + +```{r f-sep-2-tc, dependson = "f-sep-2-const"} +f_sep_2_tc <- update(f_sep_2_const, error_model = "tc") +status(f_sep_2_tc) |> kable() +``` + +Using the two-component error model, all separate fits converge with the +exception of the alternative pathway fit with DFOP used for the parent and the +Sassafras dataset. + +```{r f-saem-2, dependson = c("f-sep-2-const", "f-sep-2-tc")} +f_saem_2 <- mhmkin(list(f_sep_2_const, f_sep_2_tc), + no_random_effect = illparms(cyan_saem_full[2:4, ]), + cluster = cl) +``` + +```{r dependson = "f-saem-2"} +status(f_saem_2) |> kable() +``` + +The hierarchical fits for the alternative pathway completed successfully. + +```{r dependson = "f-saem-2"} +illparms(f_saem_2) |> kable() +``` + +In both fits, the random effects for the formation fractions for the +pathways from JCZ38 to JSE76, and for the reverse pathway from JSE76 +to JCZ38 are ill-defined. + +```{r dependson = "f-saem-2"} +anova(f_saem_2) |> kable(digits = 1) +``` + +The variants using the biexponential models DFOP and SFORB for the parent +compound and the two-component error model give the lowest AIC and BIC values +and are plotted below. Compared with the original pathway, the AIC and BIC +values indicate a large improvement. This is confirmed by the plots, which show +that the metabolite JCZ38 is fitted much better with this model. + +\clearpage + +```{r fig.cap = "FOMC pathway fit with two-component error, alternative pathway", dependson = "f-saem-2", fig.height = 8} +plot(f_saem_2[["fomc_path_2", "tc"]]) +``` +\clearpage + +```{r fig.cap = "DFOP pathway fit with two-component error, alternative pathway", dependson = "f-saem-2", fig.height = 8} +plot(f_saem_2[["dfop_path_2", "tc"]]) +``` + +\clearpage + +```{r fig.cap = "SFORB pathway fit with two-component error, alternative pathway", dependson = "f-saem-2", fig.height = 8} +plot(f_saem_2[["sforb_path_2", "tc"]]) +``` + +\clearpage + +## Refinement of alternative pathway fits + +All ill-defined random effects that were identified in the parent only fits and +in the above pathway fits, are excluded for the final evaluations below. +For this purpose, a list of character vectors is created below that can be indexed +by row and column indices, and which contains the degradation parameter names for which +random effects should be excluded for each of the hierarchical fits contained +in `f_saem_2`. + +```{r f-saem-3, dependson = "f-saem-2"} +no_ranef <- matrix(list(), nrow = 3, ncol = 2, dimnames = dimnames(f_saem_2)) +no_ranef[["fomc_path_2", "const"]] <- c("log_beta", "f_JCZ38_qlogis", "f_JSE76_qlogis") +no_ranef[["fomc_path_2", "tc"]] <- c("cyan_0", "f_JCZ38_qlogis", "f_JSE76_qlogis") +no_ranef[["dfop_path_2", "const"]] <- c("cyan_0", "f_JCZ38_qlogis", "f_JSE76_qlogis") +no_ranef[["dfop_path_2", "tc"]] <- c("cyan_0", "log_k1", "f_JCZ38_qlogis", "f_JSE76_qlogis") +no_ranef[["sforb_path_2", "const"]] <- c("cyan_free_0", + "f_JCZ38_qlogis", "f_JSE76_qlogis") +no_ranef[["sforb_path_2", "tc"]] <- c("cyan_free_0", "log_k_cyan_free_bound", + "f_JCZ38_qlogis", "f_JSE76_qlogis") +clusterExport(cl, "no_ranef") + +f_saem_3 <- update(f_saem_2, + no_random_effect = no_ranef, + cluster = cl) +``` + +```{r dependson = "f-saem-3"} +status(f_saem_3) |> kable() +``` + +With the exception of the FOMC pathway fit with constant variance, all updated +fits completed successfully. However, the Fisher Information Matrix for the +fixed effects (Fth) could not be inverted, so no confidence intervals for the +optimised parameters are available. + +```{r dependson = "f-saem-3"} +illparms(f_saem_3) |> kable() +``` + +```{r dependson = "f-saem-3"} +anova(f_saem_3) |> kable(digits = 1) +``` + +While the AIC and BIC values of the best fit (DFOP pathway fit with +two-component error) are lower than in the previous fits with the alternative +pathway, the practical value of these refined evaluations is limited +as no confidence intervals are obtained. + +\clearpage + +# Conclusion + +It was demonstrated that a relatively complex transformation pathway with +parallel formation of two primary metabolites and one secondary metabolite +can be fitted even if the data in the individual datasets are quite different +and partly only cover the formation phase. + +The run times of the pathway fits were several hours, limiting the +practical feasibility of iterative refinements based on ill-defined +parameters and of alternative checks of parameter identifiability +based on multistart runs. + +# Acknowledgements + +The helpful comments by Janina Wöltjen of the German Environment Agency +are gratefully acknowledged. + +\clearpage + +# Appendix + +## Plots of fits that were not refined further + +```{r fig.cap = "SFO pathway fit with two-component error", dependson = "f-saem-1", fig.height = 8} +plot(f_saem_1[["sfo_path_1", "tc"]]) +``` + +\clearpage + +```{r fig.cap = "FOMC pathway fit with two-component error", dependson = "f-saem-1", fig.height = 8} +plot(f_saem_1[["fomc_path_1", "tc"]]) +``` + +\clearpage + + +```{r fig.cap = "HS pathway fit with two-component error", dependson = "f-saem-1", fig.height = 8} +plot(f_saem_1[["sforb_path_1", "tc"]]) +``` + +\clearpage + + +## Hierarchical fit listings + +### Pathway 1 + +```{r listings-1, results = "asis", echo = FALSE, cache = FALSE} +errmods <- c(const = "constant variance", tc = "two-component error") +degmods <- c( + sfo_path_1 = "SFO path 1", + fomc_path_1 = "FOMC path 1", + dfop_path_1 = "DFOP path 1", + sforb_path_1 = "SFORB path 1", + hs_path_1 = "HS path 1") +for (deg_mod in rownames(f_saem_1)) { + for (err_mod in c("const", "tc")) { + fit <- f_saem_1[[deg_mod, err_mod]] + if (!inherits(fit$so, "try-error")) { + caption <- paste("Hierarchical", degmods[deg_mod], "fit with", errmods[err_mod]) + summary_listing(fit, caption) + } + } +} +``` + +### Pathway 2 + +```{r listings-2, results = "asis", echo = FALSE, cache = FALSE} +degmods <- c( + fomc_path_2 = "FOMC path 2", + dfop_path_2 = "DFOP path 2", + sforb_path_2 = "SFORB path 2") +for (deg_mod in rownames(f_saem_2)) { + for (err_mod in c("const", "tc")) { + fit <- f_saem_2[[deg_mod, err_mod]] + if (!inherits(fit$so, "try-error")) { + caption <- paste("Hierarchical", degmods[deg_mod], "fit with", errmods[err_mod]) + summary_listing(fit, caption) + } + } +} +``` + +### Pathway 2, refined fits + +```{r listings-3, results = "asis", echo = FALSE, cache = FALSE} +degmods <- c( + fomc_path_2 = "FOMC path 2", + dfop_path_2 = "DFOP path 2", + sforb_path_2 = "SFORB path 2") +for (deg_mod in rownames(f_saem_3)) { + for (err_mod in c("const", "tc")) { + fit <- f_saem_3[[deg_mod, err_mod]] + if (!inherits(fit$so, "try-error")) { + caption <- paste("Hierarchical", degmods[deg_mod], "fit with reduced random effects,", errmods[err_mod]) + summary_listing(fit, caption) + } + } +} +``` + +## Session info + +```{r, echo = FALSE, cache = FALSE} +parallel::stopCluster(cl = cl) +sessionInfo() +``` + +## Hardware info + +```{r, echo = FALSE} +if(!inherits(try(cpuinfo <- readLines("/proc/cpuinfo")), "try-error")) { + cat(gsub("model name\t: ", "CPU model: ", cpuinfo[grep("model name", cpuinfo)[1]])) +} +if(!inherits(try(meminfo <- readLines("/proc/meminfo")), "try-error")) { + cat(gsub("model name\t: ", "System memory: ", meminfo[grep("MemTotal", meminfo)[1]])) +} +``` diff --git a/vignettes/prebuilt/2022_dmta_parent.rmd b/vignettes/prebuilt/2022_dmta_parent.rmd new file mode 100644 index 00000000..cc2a9124 --- /dev/null +++ b/vignettes/prebuilt/2022_dmta_parent.rmd @@ -0,0 +1,406 @@ +--- +title: "Testing hierarchical parent degradation kinetics with residue data on dimethenamid and dimethenamid-P" +author: Johannes Ranke +date: Last change on 5 January 2023, last compiled on `r format(Sys.time(), "%e %B %Y")` +geometry: margin=2cm +toc: true +bibliography: references.bib +output: + pdf_document: + extra_dependencies: ["float", "listing"] +--- + +```{r setup, echo = FALSE, cache = FALSE} +options(width = 80) # For summary listings +knitr::opts_chunk$set( + comment = "", tidy = FALSE, cache = TRUE, fig.pos = "H", fig.align = "center" +) +``` + +\clearpage + +# Introduction + +The purpose of this document is to demonstrate how nonlinear hierarchical +models (NLHM) based on the parent degradation models SFO, FOMC, DFOP and HS +can be fitted with the mkin package. + +It was assembled in the course of work package 1.1 of Project Number 173340 +(Application of nonlinear hierarchical models to the kinetic evaluation of +chemical degradation data) of the German Environment Agency carried out in 2022 +and 2023. + +The mkin package is used in version `r packageVersion("mkin")`. It contains the +test data and the functions used in the evaluations. The `saemix` package is +used as a backend for fitting the NLHM, but is also loaded to make the +convergence plot function available. + +This document is processed with the `knitr` package, which also provides the +`kable` function that is used to improve the display of tabular data in R +markdown documents. For parallel processing, the `parallel` package is used. + +```{r packages, cache = FALSE, message = FALSE} +library(mkin) +library(knitr) +library(saemix) +library(parallel) +n_cores <- detectCores() +if (Sys.info()["sysname"] == "Windows") { + cl <- makePSOCKcluster(n_cores) +} else { + cl <- makeForkCluster(n_cores) +} +``` + +\clearpage + +# Data + +The test data are available in the mkin package as an object of class +`mkindsg` (mkin dataset group) under the identifier `dimethenamid_2018`. The +following preprocessing steps are still necessary: + +- The data available for the enantiomer dimethenamid-P (DMTAP) are renamed + to have the same substance name as the data for the racemic mixture + dimethenamid (DMTA). The reason for this is that no difference between their + degradation behaviour was identified in the EU risk assessment. +- The data for transformation products and unnecessary columns are discarded +- The observation times of each dataset are multiplied with the + corresponding normalisation factor also available in the dataset, in order to + make it possible to describe all datasets with a single set of parameters + that are independent of temperature +- Finally, datasets observed in the same soil (`Elliot 1` and `Elliot 2`) are + combined, resulting in dimethenamid (DMTA) data from six soils. + +The following commented R code performs this preprocessing. + +```{r data} +# Apply a function to each of the seven datasets in the mkindsg object to create a list +dmta_ds <- lapply(1:7, function(i) { + ds_i <- dimethenamid_2018$ds[[i]]$data # Get a dataset + ds_i[ds_i$name == "DMTAP", "name"] <- "DMTA" # Rename DMTAP to DMTA + ds_i <- subset(ds_i, name == "DMTA", c("name", "time", "value")) # Select data + ds_i$time <- ds_i$time * dimethenamid_2018$f_time_norm[i] # Normalise time + ds_i # Return the dataset +}) + +# Use dataset titles as names for the list elements +names(dmta_ds) <- sapply(dimethenamid_2018$ds, function(ds) ds$title) + +# Combine data for Elliot soil to obtain a named list with six elements +dmta_ds[["Elliot"]] <- rbind(dmta_ds[["Elliot 1"]], dmta_ds[["Elliot 2"]]) # +dmta_ds[["Elliot 1"]] <- NULL +dmta_ds[["Elliot 2"]] <- NULL +``` + +\clearpage + +The following tables show the `r length(dmta_ds)` datasets. + +```{r results = "asis"} + +for (ds_name in names(dmta_ds)) { + print(kable(mkin_long_to_wide(dmta_ds[[ds_name]]), + caption = paste("Dataset", ds_name), + label = paste0("tab:", ds_name), booktabs = TRUE)) + cat("\n\\clearpage\n") +} +``` + +# Separate evaluations + +In order to obtain suitable starting parameters for the NLHM fits, separate +fits of the four models to the data for each soil are generated using the `mmkin` +function from the `mkin` package. In a first step, constant variance is assumed. +Convergence is checked with the `status` function. + +```{r f-sep-const, dependson = "data"} +deg_mods <- c("SFO", "FOMC", "DFOP", "HS") +f_sep_const <- mmkin( + deg_mods, + dmta_ds, + error_model = "const", + quiet = TRUE) + +status(f_sep_const) |> kable() +``` +In the table above, OK indicates convergence, and C indicates failure to +converge. All separate fits with constant variance converged, with the sole +exception of the HS fit to the BBA 2.2 data. To prepare for fitting NLHM using +the two-component error model, the separate fits are updated assuming +two-component error. + +```{r f-sep-tc, dependson = "f-sep-const"} +f_sep_tc <- update(f_sep_const, error_model = "tc") +status(f_sep_tc) |> kable() +``` + +Using the two-component error model, the one fit that did not converge with +constant variance did converge, but other non-SFO fits failed to converge. + +\clearpage + +# Hierarchichal model fits + +The following code fits eight versions of hierarchical models to the data, +using SFO, FOMC, DFOP and HS for the parent compound, and using either constant +variance or two-component error for the error model. The default parameter +distribution model in mkin allows for variation of all degradation parameters +across the assumed population of soils. In other words, each degradation +parameter is associated with a random effect as a first step. The `mhmkin` +function makes it possible to fit all eight versions in parallel (given a +sufficient number of computing cores being available) to save execution time. + +Convergence plots and summaries for these fits are shown in the appendix. + +```{r f-saem, dependson = c("f-sep-const", "f-sep-tc")} +f_saem <- mhmkin(list(f_sep_const, f_sep_tc), transformations = "saemix") +``` +The output of the `status` function shows that all fits terminated +successfully. + +```{r dependson = "f-saem"} +status(f_saem) |> kable() +``` +The AIC and BIC values show that the biphasic models DFOP and HS give the best +fits. + +```{r dependson = "f-saem"} +anova(f_saem) |> kable(digits = 1) +``` + +The DFOP model is preferred here, as it has a better mechanistic basis for +batch experiments with constant incubation conditions. Also, it shows the +lowest AIC and BIC values in the first set of fits when combined with the +two-component error model. Therefore, the DFOP model was selected for further +refinements of the fits with the aim to make the model fully identifiable. + +## Parameter identifiability based on the Fisher Information Matrix + +Using the `illparms` function, ill-defined statistical model parameters such +as standard deviations of the degradation parameters in the population and +error model parameters can be found. + +```{r dependson = "f-saem"} +illparms(f_saem) |> kable() +``` + +According to the `illparms` function, the fitted standard deviation of the +second kinetic rate constant `k2` is ill-defined in both DFOP fits. This +suggests that different values would be obtained for this standard deviation +when using different starting values. + +The thus identified overparameterisation is addressed by removing the random +effect for `k2` from the parameter model. + +```{r f-saem-dfop-tc-no-ranef-k2, dependson = "f-saem"} +f_saem_dfop_tc_no_ranef_k2 <- update(f_saem[["DFOP", "tc"]], + no_random_effect = "k2") +``` + +For the resulting fit, it is checked whether there are still ill-defined +parameters, + +```{r f-saem-dfop-tc-no-ranef-k2-illparms, dependson = "f-saem-dfop-tc-no-ranef-k2"} +illparms(f_saem_dfop_tc_no_ranef_k2) +``` +which is not the case. Below, the refined model is compared with the previous +best model. The model without random effect for `k2` is a reduced version of +the previous model. Therefore, the models are nested and can be compared using +the likelihood ratio test. This is achieved with the argument `test = TRUE` +to the `anova` function. + +```{r f-saem-dfop-tc-no-ranef-k2-comparison, dependson = "f-saem-dfop-tc-no-ranef-k2"} +anova(f_saem[["DFOP", "tc"]], f_saem_dfop_tc_no_ranef_k2, test = TRUE) |> + kable(format.args = list(digits = 4)) +``` + +The AIC and BIC criteria are lower after removal of the ill-defined random +effect for `k2`. The p value of the likelihood ratio test is much greater +than 0.05, indicating that the model with the higher likelihood (here +the model with random effects for all degradation parameters +`f_saem[["DFOP", "tc"]]`) does not fit significantly better than the model +with the lower likelihood (the reduced model `f_saem_dfop_tc_no_ranef_k2`). + +Therefore, AIC, BIC and likelihood ratio test suggest the use of the reduced model. + +The convergence of the fit is checked visually. + +```{r convergence-saem-dfop-tc-no-ranef-k2, fig.cap = "Convergence plot for the NLHM DFOP fit with two-component error and without a random effect on 'k2'", dependson = "f-saem-dfop-tc-no-ranef-k2", echo = FALSE, fig.width = 9, fig.height = 8} +plot(f_saem_dfop_tc_no_ranef_k2$so, plot.type = "convergence") +``` + +All parameters appear to have converged to a satisfactory degree. The final fit +is plotted using the plot method from the mkin package. + +```{r plot-saem-dfop-tc-no-ranef-k2, fig.cap = "Plot of the final NLHM DFOP fit", dependson = "f-saem-dfop-tc-no-ranef-k2", fig.width = 9, fig.height = 5} +plot(f_saem_dfop_tc_no_ranef_k2) +``` +Finally, a summary report of the fit is produced. + +```{r summary-saem-dfop-tc-no-ranef-k2, dependson = "f-saem-dfop-tc-no-ranef-k2"} +summary(f_saem_dfop_tc_no_ranef_k2) +``` + +\clearpage + +## Alternative check of parameter identifiability + +The parameter check used in the `illparms` function is based on a quadratic +approximation of the likelihood surface near its optimum, which is calculated +using the Fisher Information Matrix (FIM). An alternative way to check +parameter identifiability [@duchesne_2021] based on a multistart approach +has recently been implemented in mkin. + +The graph below shows boxplots of the parameters obtained in 50 runs of the +saem algorithm with different parameter combinations, sampled from the range of +the parameters obtained for the individual datasets fitted separately using +nonlinear regression. + +```{r multistart-full, dependson = "f-saem"} +f_saem_dfop_tc_multi <- multistart(f_saem[["DFOP", "tc"]], n = 50, cores = 15) +``` + +```{r multistart-full-par, dependson = "multistart_full", fig.cap = "Scaled parameters from the multistart runs, full model", fig.width = 10, fig.height = 6} +par(mar = c(6.1, 4.1, 2.1, 2.1)) +parplot(f_saem_dfop_tc_multi, lpos = "bottomright", ylim = c(0.3, 10), las = 2) +``` + +The graph clearly confirms the lack of identifiability of the variance of `k2` in +the full model. The overparameterisation of the model also indicates a lack of +identifiability of the variance of parameter `g`. + +The parameter boxplots of the multistart runs with the reduced model shown +below indicate that all runs give similar results, regardless of the starting +parameters. + +```{r multistart-reduced, dependson = "f-saem-dfop-tc-no-ranef-k2"} +f_saem_dfop_tc_no_ranef_k2_multi <- multistart(f_saem_dfop_tc_no_ranef_k2, + n = 50, cores = 15) +``` + +```{r multistart-reduced-par, dependson = "multistart_reduced", fig.cap = "Scaled parameters from the multistart runs, reduced model", fig.width = 10, fig.height = 5} +par(mar = c(6.1, 4.1, 2.1, 2.1)) +parplot(f_saem_dfop_tc_no_ranef_k2_multi, ylim = c(0.5, 2), las = 2, + lpos = "bottomright") +``` + +When only the parameters of the top 25% of the fits are shown (based on a feature +introduced in mkin 1.2.2 currently under development), the scatter is even less +as shown below. + +```{r multistart-reduced-par-llquant, dependson = "multistart_reduced", fig.cap = "Scaled parameters from the multistart runs, reduced model, fits with the top 25\\% likelihood values", fig.width = 10, fig.height = 5} +par(mar = c(6.1, 4.1, 2.1, 2.1)) +parplot(f_saem_dfop_tc_no_ranef_k2_multi, ylim = c(0.5, 2), las = 2, llquant = 0.25, + lpos = "bottomright") +``` + + +\clearpage + +# Conclusions + +Fitting the four parent degradation models SFO, FOMC, DFOP and HS as part of +hierarchical model fits with two different error models and normal +distributions of the transformed degradation parameters works without technical +problems. The biphasic models DFOP and HS gave the best fit to the data, but +the default parameter distribution model was not fully identifiable. Removing +the random effect for the second kinetic rate constant of the DFOP model +resulted in a reduced model that was fully identifiable and showed the lowest +values for the model selection criteria AIC and BIC. The reliability of the +identification of all model parameters was confirmed using multiple starting +values. + +# Acknowledgements + +The helpful comments by Janina Wöltjen of the German Environment Agency +are gratefully acknowledged. + +# References + +\vspace{1em} + +::: {#refs} +::: + +\clearpage + +# Appendix + +## Hierarchical model fit listings + +```{r listings, results = "asis", echo = FALSE} +for (deg_mod in deg_mods) { + for (err_mod in c("const", "tc")) { + caption <- paste("Hierarchical mkin fit of the", deg_mod, "model with error model", err_mod) + summary_listing(f_saem[[deg_mod, err_mod]], caption) + } +} +``` + +## Hierarchical model convergence plots + +```{r convergence-saem-sfo-const, fig.cap = "Convergence plot for the NLHM SFO fit with constant variance", dependson = "f-saem", echo = FALSE, fig.width = 9, fig.height = 5} +plot(f_saem[["SFO", "const"]]$so, plot.type = "convergence") +``` + +\clearpage + +```{r convergence-saem-sfo-tc, fig.cap = "Convergence plot for the NLHM SFO fit with two-component error", dependson = "f-saem", echo = FALSE, fig.width = 9, fig.height = 5} +plot(f_saem[["SFO", "tc"]]$so, plot.type = "convergence") +``` + +\clearpage + +```{r convergence-saem-fomc-const, fig.cap = "Convergence plot for the NLHM FOMC fit with constant variance", dependson = "f-saem", echo = FALSE, fig.width = 9, fig.height = 8} +plot(f_saem[["FOMC", "const"]]$so, plot.type = "convergence") +``` + +\clearpage + +```{r convergence-saem-fomc-tc, fig.cap = "Convergence plot for the NLHM FOMC fit with two-component error", dependson = "f-saem", echo = FALSE, fig.width = 9, fig.height = 8} +plot(f_saem[["FOMC", "tc"]]$so, plot.type = "convergence") +``` + +\clearpage + +```{r convergence-saem-dfop-const, fig.cap = "Convergence plot for the NLHM DFOP fit with constant variance", dependson = "f-saem", echo = FALSE, fig.width = 9, fig.height = 8} +plot(f_saem[["DFOP", "const"]]$so, plot.type = "convergence") +``` + +\clearpage + +```{r convergence-saem-dfop-tc, fig.cap = "Convergence plot for the NLHM DFOP fit with two-component error", dependson = "f-saem", echo = FALSE, fig.width = 9, fig.height = 8} +plot(f_saem[["DFOP", "tc"]]$so, plot.type = "convergence") +``` +\clearpage + +```{r convergence-saem-hs-const, fig.cap = "Convergence plot for the NLHM HS fit with constant variance", dependson = "f-saem", echo = FALSE, fig.width = 9, fig.height = 8} +plot(f_saem[["HS", "const"]]$so, plot.type = "convergence") +``` + +\clearpage + +```{r convergence-saem-hs-tc, fig.cap = "Convergence plot for the NLHM HS fit with two-component error", dependson = "f-saem", echo = FALSE, fig.width = 9, fig.height = 8} +plot(f_saem[["HS", "tc"]]$so, plot.type = "convergence") +``` + +\clearpage + +## Session info + +```{r, echo = FALSE} +parallel::stopCluster(cl) +sessionInfo() +``` + +## Hardware info + +```{r, echo = FALSE} +if(!inherits(try(cpuinfo <- readLines("/proc/cpuinfo")), "try-error")) { + cat(gsub("model name\t: ", "CPU model: ", cpuinfo[grep("model name", cpuinfo)[1]])) +} +if(!inherits(try(meminfo <- readLines("/proc/meminfo")), "try-error")) { + cat(gsub("model name\t: ", "System memory: ", meminfo[grep("MemTotal", meminfo)[1]])) +} +``` diff --git a/vignettes/prebuilt/2022_dmta_pathway.rmd b/vignettes/prebuilt/2022_dmta_pathway.rmd new file mode 100644 index 00000000..ff2b527c --- /dev/null +++ b/vignettes/prebuilt/2022_dmta_pathway.rmd @@ -0,0 +1,426 @@ +--- +title: "Testing hierarchical pathway kinetics with residue data on dimethenamid and dimethenamid-P" +author: Johannes Ranke +date: Last change on 8 January 2023, last compiled on `r format(Sys.time(), "%e %B %Y")` +geometry: margin=2cm +bibliography: references.bib +toc: true +output: + pdf_document: + extra_dependencies: ["float", "listing"] +--- + +```{r setup, echo = FALSE, cache = FALSE} +options(width = 80) # For summary listings +knitr::opts_chunk$set( + comment = "", tidy = FALSE, cache = TRUE, fig.pos = "H", fig.align = "center" +) +``` + +\clearpage + +# Introduction + +The purpose of this document is to test demonstrate how nonlinear hierarchical +models (NLHM) based on the parent degradation models SFO, FOMC, DFOP and HS, +with parallel formation of two or more metabolites can be fitted with the mkin package. + +It was assembled in the course of work package 1.2 of Project Number 173340 +(Application of nonlinear hierarchical models to the kinetic evaluation of +chemical degradation data) of the German Environment Agency carried out in 2022 +and 2023. + +The mkin package is used in version `r packageVersion("mkin")`, which is currently +under development. It contains the test data, and the functions used in the +evaluations. The `saemix` package is used as a backend for fitting the NLHM, +but is also loaded to make the convergence plot function available. + +This document is processed with the `knitr` package, which also provides the +`kable` function that is used to improve the display of tabular data in R +markdown documents. For parallel processing, the `parallel` package is used. + +```{r, packages, cache = FALSE, message = FALSE} +library(mkin) +library(knitr) +library(saemix) +library(parallel) +n_cores <- detectCores() +if (Sys.info()["sysname"] == "Windows") { + cl <- makePSOCKcluster(n_cores) +} else { + cl <- makeForkCluster(n_cores) +} +``` + +\clearpage + +# Data + +The test data are available in the mkin package as an object of class `mkindsg` +(mkin dataset group) under the identifier `dimethenamid_2018`. The following +preprocessing steps are done in this document. + +- The data available for the enantiomer dimethenamid-P (DMTAP) are renamed + to have the same substance name as the data for the racemic mixture + dimethenamid (DMTA). The reason for this is that no difference between their + degradation behaviour was identified in the EU risk assessment. +- Unnecessary columns are discarded +- The observation times of each dataset are multiplied with the + corresponding normalisation factor also available in the dataset, in order to + make it possible to describe all datasets with a single set of parameters + that are independent of temperature +- Finally, datasets observed in the same soil (`Elliot 1` and `Elliot 2`) are + combined, resulting in dimethenamid (DMTA) data from six soils. + +The following commented R code performs this preprocessing. + +```{r, data} +# Apply a function to each of the seven datasets in the mkindsg object to create a list +dmta_ds <- lapply(1:7, function(i) { + ds_i <- dimethenamid_2018$ds[[i]]$data # Get a dataset + ds_i[ds_i$name == "DMTAP", "name"] <- "DMTA" # Rename DMTAP to DMTA + ds_i <- subset(ds_i, select = c("name", "time", "value")) # Select data + ds_i$time <- ds_i$time * dimethenamid_2018$f_time_norm[i] # Normalise time + ds_i # Return the dataset +}) + +# Use dataset titles as names for the list elements +names(dmta_ds) <- sapply(dimethenamid_2018$ds, function(ds) ds$title) + +# Combine data for Elliot soil to obtain a named list with six elements +dmta_ds[["Elliot"]] <- rbind(dmta_ds[["Elliot 1"]], dmta_ds[["Elliot 2"]]) # +dmta_ds[["Elliot 1"]] <- NULL +dmta_ds[["Elliot 2"]] <- NULL +``` + +\clearpage + +The following tables show the `r length(dmta_ds)` datasets. + +```{r show-data, dependson = "data", results = "asis"} +for (ds_name in names(dmta_ds)) { + print( + kable(mkin_long_to_wide(dmta_ds[[ds_name]]), + caption = paste("Dataset", ds_name), + booktabs = TRUE, row.names = FALSE)) + cat("\n\\clearpage\n") +} +``` + +# Separate evaluations + +As a first step to obtain suitable starting parameters for the NLHM fits, we do +separate fits of several variants of the pathway model used previously +[@ranke2021], varying the kinetic model for the parent compound. Because the +SFORB model often provides faster convergence than the DFOP model, and can +sometimes be fitted where the DFOP model results in errors, it is included in +the set of parent models tested here. + +```{r, sep-1-const, dependson = "data"} +if (!dir.exists("dmta_dlls")) dir.create("dmta_dlls") +m_sfo_path_1 <- mkinmod( + DMTA = mkinsub("SFO", c("M23", "M27", "M31")), + M23 = mkinsub("SFO"), + M27 = mkinsub("SFO"), + M31 = mkinsub("SFO", "M27", sink = FALSE), + name = "m_sfo_path", dll_dir = "dmta_dlls", + unload = TRUE, overwrite = TRUE, + quiet = TRUE +) +m_fomc_path_1 <- mkinmod( + DMTA = mkinsub("FOMC", c("M23", "M27", "M31")), + M23 = mkinsub("SFO"), + M27 = mkinsub("SFO"), + M31 = mkinsub("SFO", "M27", sink = FALSE), + name = "m_fomc_path", dll_dir = "dmta_dlls", + unload = TRUE, overwrite = TRUE, + quiet = TRUE +) +m_dfop_path_1 <- mkinmod( + DMTA = mkinsub("DFOP", c("M23", "M27", "M31")), + M23 = mkinsub("SFO"), + M27 = mkinsub("SFO"), + M31 = mkinsub("SFO", "M27", sink = FALSE), + name = "m_dfop_path", dll_dir = "dmta_dlls", + unload = TRUE, overwrite = TRUE, + quiet = TRUE +) +m_sforb_path_1 <- mkinmod( + DMTA = mkinsub("SFORB", c("M23", "M27", "M31")), + M23 = mkinsub("SFO"), + M27 = mkinsub("SFO"), + M31 = mkinsub("SFO", "M27", sink = FALSE), + name = "m_sforb_path", dll_dir = "dmta_dlls", + unload = TRUE, overwrite = TRUE, + quiet = TRUE +) +m_hs_path_1 <- mkinmod( + DMTA = mkinsub("HS", c("M23", "M27", "M31")), + M23 = mkinsub("SFO"), + M27 = mkinsub("SFO"), + M31 = mkinsub("SFO", "M27", sink = FALSE), + name = "m_hs_path", dll_dir = "dmta_dlls", + unload = TRUE, overwrite = TRUE, + quiet = TRUE +) +deg_mods_1 <- list( + sfo_path_1 = m_sfo_path_1, + fomc_path_1 = m_fomc_path_1, + dfop_path_1 = m_dfop_path_1, + sforb_path_1 = m_sforb_path_1, + hs_path_1 = m_hs_path_1) + +sep_1_const <- mmkin( + deg_mods_1, + dmta_ds, + error_model = "const", + quiet = TRUE) + +status(sep_1_const) |> kable() +``` + +All separate pathway fits with SFO or FOMC for the parent and constant variance +converged (status OK). Most fits with DFOP or SFORB for the parent converged +as well. The fits with HS for the parent did not converge with default settings. + +```{r, sep-1-tc, dependson = "sep-1-const"} +sep_1_tc <- update(sep_1_const, error_model = "tc") +status(sep_1_tc) |> kable() +``` + +With the two-component error model, the set of fits with convergence problems +is slightly different, with convergence problems appearing for different data +sets when applying the DFOP and SFORB model and some additional convergence +problems when using the FOMC model for the parent. + +\clearpage + +# Hierarchichal model fits + +The following code fits two sets of the corresponding hierarchical models to +the data, one assuming constant variance, and one assuming two-component error. + +```{r saem-1, dependson = c("sep-1-const", "sep-1-tc")} +saem_1 <- mhmkin(list(sep_1_const, sep_1_tc)) +``` +The run time for these fits was around two hours on five year old hardware. After +a recent hardware upgrade these fits complete in less than twenty minutes. + +```{r, saem-1-status, dependson = "saem-1"} +status(saem_1) |> kable() +``` + +According to the `status` function, all fits terminated successfully. + +```{r saem-1-anova, dependson = "saem-1"} +anova(saem_1) |> kable(digits = 1) +``` + +When the goodness-of-fit of the models is compared, a warning is obtained, +indicating that the likelihood of the pathway fit with SFORB for the parent +compound and constant variance could not be calculated with importance sampling +(method 'is'). As this is the default method on which all AIC and BIC +comparisons are based, this variant is not included in the model comparison +table. Comparing the goodness-of-fit of the remaining models, HS model model +with two-component error provides the best fit. However, for batch experiments +performed with constant conditions such as the experiments evaluated here, +there is no reason to assume a discontinuity, so the SFORB model is +preferable from a mechanistic viewpoint. In addition, the information criteria +AIC and BIC are very similar for HS and SFORB. Therefore, the SFORB model is +selected here for further refinements. + +\clearpage + +## Parameter identifiability based on the Fisher Information Matrix + +Using the `illparms` function, ill-defined statistical model parameters such as +standard deviations of the degradation parameters in the population and error +model parameters can be found. + +```{r saem-1-illparms, dependson = "saem-1"} +illparms(saem_1) |> kable() +``` + +When using constant variance, no ill-defined variance parameters are identified +with the `illparms` function in any of the degradation models. When using +the two-component error model, there is one ill-defined variance parameter +in all variants except for the variant using DFOP for the parent compound. + +For the selected combination of the SFORB pathway model with two-component +error, the random effect for the rate constant from reversibly bound DMTA to +the free DMTA (`k_DMTA_bound_free`) is not well-defined. Therefore, the fit is +updated without assuming a random effect for this parameter. + +```{r saem-sforb-path-1-tc-reduced, dependson = "saem-1"} +saem_sforb_path_1_tc_reduced <- update(saem_1[["sforb_path_1", "tc"]], + no_random_effect = "log_k_DMTA_bound_free") +illparms(saem_sforb_path_1_tc_reduced) +``` + +As expected, no ill-defined parameters remain. The model comparison below shows +that the reduced model is preferable. + +```{r saem-sforb-path-1-tc-reduced-anova, dependson = "saem-sforb-path-1-tc-reduced"} +anova(saem_1[["sforb_path_1", "tc"]], saem_sforb_path_1_tc_reduced) |> kable(digits = 1) +``` + +The convergence plot of the refined fit is shown below. + +```{r saem-sforb-path-1-tc-reduced-convergence, dependson = "saem-sforb-path-1-tc-reduced", fig.height = 12} +plot(saem_sforb_path_1_tc_reduced$so, plot.type = "convergence") +``` + +For some parameters, for example for `f_DMTA_ilr_1` and `f_DMTA_ilr_2`, i.e. +for two of the parameters determining the formation fractions of the parallel +formation of the three metabolites, some movement of the parameters is still +visible in the second phase of the algorithm. However, the amplitude of this +movement is in the range of the amplitude towards the end of the first phase. +Therefore, it is likely that an increase in iterations would not improve the +parameter estimates very much, and it is proposed that the fit is acceptable. +No numeric convergence criterion is implemented in saemix. + +\clearpage + +## Alternative check of parameter identifiability + +As an alternative check of parameter identifiability [@duchesne_2021], +multistart runs were performed on the basis of the refined fit shown above. + +```{r saem-sforb-multistart, dependson = "saem-sforb-path-1-tc-reduced"} +saem_sforb_path_1_tc_reduced_multi <- multistart(saem_sforb_path_1_tc_reduced, + n = 32, cores = 10) +``` + +```{r dependson = "saem-sforb-multistart"} +print(saem_sforb_path_1_tc_reduced_multi) +``` + +Out of the 32 fits that were initiated, only 17 terminated without an error. +The reason for this is that the wide variation of starting parameters in combination +with the parameter variation that is used in the SAEM algorithm leads to +parameter combinations for the degradation model that the numerical integration +routine cannot cope with. Because of this variation of initial parameters, +some of the model fits take up to two times more time than the original fit. + +```{r dependson = "saem-sforb-multistart", fig.cap = "Parameter boxplots for the multistart runs that succeeded", fig.height = 6, fig.width = 10} +par(mar = c(12.1, 4.1, 2.1, 2.1)) +parplot(saem_sforb_path_1_tc_reduced_multi, ylim = c(0.5, 2), las = 2) +``` + +However, visual analysis of the boxplot of the parameters obtained in the +successful fits confirms that the results are sufficiently independent of the +starting parameters, and there are no remaining ill-defined parameters. + +\clearpage + + +# Plots of selected fits + +The SFORB pathway fits with full and reduced parameter distribution model are +shown below. + +```{r fig.cap = "SFORB pathway fit with two-component error", dependson = "saem-1", fig.height = 8} +plot(saem_1[["sforb_path_1", "tc"]]) +``` + +\clearpage + +```{r fig.cap = "SFORB pathway fit with two-component error, reduced parameter model", dependson = "saem-sforb-path-1-tc-reduced", fig.height = 8} +plot(saem_sforb_path_1_tc_reduced) +``` + +Plots of the remaining fits and listings for all successful fits are shown in +the Appendix. + + +# Conclusions + +Pathway fits with SFO, FOMC, DFOP, SFORB and HS models for the parent compound +could be successfully performed. + +\clearpage + +# Acknowledgements + +The helpful comments by Janina Wöltjen of the German Environment Agency +on earlier versions of this document are gratefully acknowledged. + +# References + +\vspace{1em} + +::: {#refs} +::: + +\clearpage + +# Appendix + +## Plots of hierarchical fits not selected for refinement + +```{r fig.cap = "SFO pathway fit with two-component error", dependson = "saem-1", fig.height = 8} +plot(saem_1[["sfo_path_1", "tc"]]) +``` + +\clearpage + +```{r fig.cap = "FOMC pathway fit with two-component error", dependson = "saem-1", fig.height = 8} +plot(saem_1[["fomc_path_1", "tc"]]) +``` + +\clearpage + + +```{r fig.cap = "HS pathway fit with two-component error", dependson = "saem-1", fig.height = 8} +plot(saem_1[["sforb_path_1", "tc"]]) +``` + +\clearpage + +## Hierarchical model fit listings + +### Fits with random effects for all degradation parameters + +```{r listings-1, results = "asis", echo = FALSE} +errmods <- c(const = "constant variance", tc = "two-component error") +degmods <- c( + sfo_path_1 = "SFO path 1", + fomc_path_1 = "FOMC path 1", + dfop_path_1 = "DFOP path 1", + sforb_path_1 = "SFORB path 1", + hs_path_1 = "HS path 1") +for (deg_mod in rownames(saem_1)) { + for (err_mod in c("const", "tc")) { + fit <- saem_1[[deg_mod, err_mod]] + if (!inherits(fit$so, "try-error")) { + caption <- paste("Hierarchical", degmods[deg_mod], "fit with", errmods[err_mod]) + tex_listing(fit, caption) + } + } +} +``` + +### Improved fit of the SFORB pathway model with two-component error + +```{r listings-2, results = "asis", echo = FALSE, dependson = "listings-1"} +caption <- paste("Hierarchical SFORB pathway fit with two-component error") +tex_listing(saem_sforb_path_1_tc_reduced, caption) +``` + +## Session info + +```{r, echo = FALSE} +parallel::stopCluster(cl) +sessionInfo() +``` + +## Hardware info + +```{r, echo = FALSE} +if(!inherits(try(cpuinfo <- readLines("/proc/cpuinfo")), "try-error")) { + cat(gsub("model name\t: ", "CPU model: ", cpuinfo[grep("model name", cpuinfo)[1]])) +} +if(!inherits(try(meminfo <- readLines("/proc/meminfo")), "try-error")) { + cat(gsub("model name\t: ", "System memory: ", meminfo[grep("MemTotal", meminfo)[1]])) +} +``` diff --git a/vignettes/prebuilt/references.bib b/vignettes/prebuilt/references.bib new file mode 100644 index 00000000..08c51747 --- /dev/null +++ b/vignettes/prebuilt/references.bib @@ -0,0 +1,187 @@ +@BOOK{bates1988, + title = {Nonlinear regression and its applications}, + publisher = {Wiley-Interscience}, + year = {1988}, + author = {D. Bates and D. Watts} +} + +@MANUAL{FOCUSkinetics2011, + title = {Generic guidance for estimating persistence and degradation kinetics + from environmental fate studies on pesticides in EU registration}, + author = {{FOCUS Work Group on Degradation Kinetics}}, + edition = {1.0}, + month = {November}, + year = {2011}, + file = {FOCUS kinetics 2011 Generic guidance:/home/ranke/dok/orgs/focus/FOCUSkineticsvc_1_0_Nov23.pdf:PDF}, + url = {http://esdac.jrc.ec.europa.eu/projects/degradation-kinetics} +} + +@MANUAL{FOCUSkinetics2014, + title = {Generic guidance for estimating persistence and degradation kinetics + from environmental fate studies on pesticides in EU registration}, + author = {{FOCUS Work Group on Degradation Kinetics}}, + edition = {1.1}, + month = {December}, + year = {2014}, + file = {FOCUS kinetics 2011 Generic guidance:/home/ranke/dok/orgs/focus/dk/FOCUSkineticsvc1.1Dec2014.pdf:PDF}, + url = {http://esdac.jrc.ec.europa.eu/projects/degradation-kinetics} +} + +@MANUAL{FOCUS2006, + title = {Guidance Document on Estimating Persistence and Degradation Kinetics + from Environmental Fate Studies on Pesticides in EU Registration. + Report of the FOCUS Work Group on Degradation Kinetics}, + author = {{FOCUS Work Group on Degradation Kinetics}}, + year = {2006}, + note = {EC Document Reference Sanco/10058/2005 version 2.0}, + url = {http://esdac.jrc.ec.europa.eu/projects/degradation-kinetics} +} + +@MANUAL{rcore2016, + title = {\textsf{R}: A Language and Environment for Statistical Computing}, + author = {{R Development Core Team}}, + organization = {R Foundation for Statistical Computing}, + address = {Vienna, Austria}, + year = {2016}, + note = {{ISBN} 3-900051-07-0}, + url = {https://www.R-project.org} +} + +@MANUAL{pkg:mkin, + title = {`{mkin}`: {K}inetic evaluation of chemical degradation data}, + author = {J. Ranke}, + year = {2021}, + url = {https://CRAN.R-project.org/package=mkin} +} + +@Inproceedings{ schaefer2007, + title = {{KinGUI}: a new kinetic software tool for evaluations according to {FOCUS} degradation kinetics}, + author = {D. Sch\"{a}fer and B. Mikolasch and P. Rainbird and B. Harvey}, + booktitle = {Proceedings of the XIII Symposium Pesticide Chemistry}, + editor = {Del Re A. A. M. and Capri E. and Fragoulis G. and Trevisan M.}, + year = {2007}, + address = {Piacenza}, + pages = {916--923} +} + +@ARTICLE{soetaert2010, + author = {Karline Soetaert and Thomas Petzoldt}, + title = {Inverse Modelling, Sensitivity and Monte Carlo Analysis in {R} Using + Package {FME}}, + journal = {Journal of Statistical Software}, + year = {2010}, + volume = {33}, + pages = {1--28}, + number = {3}, + doi = {10.18637/jss.v033.i03} +} + +@Inproceedings{ ranke2012, + title = {Parameter reliability in kinetic evaluation of environmental metabolism data - Assessment and the influence of model specification}, + author = {J. Ranke and R. Lehmann}, + booktitle = {SETAC World 20-24 May}, + year = {2012}, + address = {Berlin}, + url = {https://jrwb.de/posters/Poster_SETAC_2012_Kinetic_parameter_uncertainty_model_parameterization_Lehmann_Ranke.pdf} +} +@Inproceedings{ ranke2015, + title = {To t-test or not to t-test, that is the question}, + author = {J. Ranke and R. Lehmann}, + booktitle = {XV Symposium on Pesticide Chemistry 2-4 September 2015}, + year = {2015}, + address = {Piacenza}, + url = {https://jrwb.de/posters/piacenza_2015.pdf} +} +@Techreport{ranke2014, + title = {{Prüfung und Validierung von Modellierungssoftware als Alternative zu + ModelMaker 4.0}}, + author = {J. Ranke}, + year = 2014, + institution = {Umweltbundesamt}, + volume = {Projektnummer 27452} +} + +@Article{ranke2018, + author="Ranke, Johannes + and W{\"o}ltjen, Janina + and Meinecke, Stefan", + title="Comparison of software tools for kinetic evaluation of chemical degradation data", + journal="Environmental Sciences Europe", + year="2018", + month="May", + day="18", + volume="30", + number="1", + pages="17", + abstract="For evaluating the fate of xenobiotics in the environment, a variety of degradation or environmental metabolism experiments are routinely conducted. The data generated in such experiments are evaluated by optimizing the parameters of kinetic models in a way that the model simulation fits the data. No comparison of the main software tools currently in use has been published to date. This article shows a comparison of numerical results as well as an overall, somewhat subjective comparison based on a scoring system using a set of criteria. The scoring was separately performed for two types of uses. Uses of type I are routine evaluations involving standard kinetic models and up to three metabolites in a single compartment. Evaluations involving non-standard model components, more than three metabolites or more than a single compartment belong to use type II. For use type I, usability is most important, while the flexibility of the model definition is most important for use type II.", + issn="2190-4715", + doi="10.1186/s12302-018-0145-1", + url="https://doi.org/10.1186/s12302-018-0145-1" +} + +@Article{ranke2019, + author = {Ranke, Johannes and Meinecke, Stefan}, + title = {Error Models for the Kinetic Evaluation of Chemical Degradation Data}, + journal = {Environments}, + year = {2019}, + volume = {6}, + number = {12}, + issn = {2076-3298}, + abstract = {In the kinetic evaluation of chemical degradation data, degradation models are fitted to the data by varying degradation model parameters to obtain the best possible fit. Today, constant variance of the deviations of the observed data from the model is frequently assumed (error model “constant variance”). Allowing for a different variance for each observed variable (“variance by variable”) has been shown to be a useful refinement. On the other hand, experience gained in analytical chemistry shows that the absolute magnitude of the analytical error often increases with the magnitude of the observed value, which can be explained by an error component which is proportional to the true value. Therefore, kinetic evaluations of chemical degradation data using a two-component error model with a constant component (absolute error) and a component increasing with the observed values (relative error) are newly proposed here as a third possibility. In order to check which of the three error models is most adequate, they have been used in the evaluation of datasets obtained from pesticide evaluation dossiers published by the European Food Safety Authority (EFSA). For quantitative comparisons of the fits, the Akaike information criterion (AIC) was used, as the commonly used error level defined by the FOrum for the Coordination of pesticide fate models and their USe(FOCUS) is based on the assumption of constant variance. A set of fitting routines was developed within the mkin software package that allow for robust fitting of all three error models. Comparisons using parent only degradation datasets, as well as datasets with the formation and decline of transformation products showed that in many cases, the two-component error model proposed here provides the most adequate description of the error structure. While it was confirmed that the variance by variable error model often provides an improved representation of the error structure in kinetic fits with metabolites, it could be shown that in many cases, the two-component error model leads to a further improvement. In addition, it can be applied to parent only fits, potentially improving the accuracy of the fit towards the end of the decline curve, where concentration levels are lower.}, + article-number = {124}, + doi = {10.3390/environments6120124}, + url = {https://www.mdpi.com/2076-3298/6/12/124}, +} + + +@Article{ranke2021, + AUTHOR = {Ranke, Johannes and Wöltjen, Janina and Schmidt, Jana and Comets, Emmanuelle}, + TITLE = {Taking Kinetic Evaluations of Degradation Data to the Next Level with Nonlinear Mixed-Effects Models}, + JOURNAL = {Environments}, + VOLUME = {8}, + YEAR = {2021}, + NUMBER = {8}, + ARTICLE-NUMBER = {71}, + URL = {https://www.mdpi.com/2076-3298/8/8/71}, + ISSN = {2076-3298}, + ABSTRACT = {When data on the degradation of a chemical substance have been collected in a number of environmental media (e.g., in different soils), two strategies can be followed for data evaluation. Currently, each individual dataset is evaluated separately, and representative degradation parameters are obtained by calculating averages of the kinetic parameters. However, such averages often take on unrealistic values if certain degradation parameters are ill-defined in some of the datasets. Moreover, the most appropriate degradation model is selected for each individual dataset, which is time consuming and then requires workarounds for averaging parameters from different models. Therefore, a simultaneous evaluation of all available data is desirable. If the environmental media are viewed as random samples from a population, an advanced strategy based on assumptions about the statistical distribution of the kinetic parameters across the population can be used. Here, we show the advantages of such simultaneous evaluations based on nonlinear mixed-effects models that incorporate such assumptions in the evaluation process. The advantages of this approach are demonstrated using synthetically generated data with known statistical properties and using publicly available experimental degradation data on two pesticidal active substances.}, + DOI = {10.3390/environments8080071} +} + +@Article{gao11, + Title = {Improving uncertainty analysis in kinetic evaluations using iteratively reweighted least squares}, + Author = {Gao, Z. and Green, J.W. and Vanderborght, J. and Schmitt, W.}, + Journal = {Environmental Science and Technology}, + Year = {2011}, + Pages = {4429-4437}, + Volume = {45}, + Type = {Journal} +} + + +@article{efsa_2018_dimethenamid, + author = {EFSA}, + issue = {4}, + journal = {EFSA Journal}, + pages = {5211}, + title = {Peer review of the pesticide risk assessment of the active substance dimethenamid-P}, + volume = {16}, + year = {2018} +} + +@techreport{dimethenamid_rar_2018_b8, + author = {{Rapporteur Member State Germany, Co-Rapporteur Member State Bulgaria}}, + year = {2018}, + title = {{Renewal Assessment Report Dimethenamid-P Volume 3 - B.8 Environmental fate and behaviour, Rev. 2 - November 2017}}, + url = {https://open.efsa.europa.eu/study-inventory/EFSA-Q-2014-00716} +} + +@article{duchesne_2021, + title={Practical identifiability in the frame of nonlinear mixed effects models: the example of the in vitro erythropoiesis}, + author={Ronan Duchesne and Anissa Guillemin and Olivier Gandrillon and Fabien Crauste}, + journal={BMC Bioinformatics}, + year={2021}, + volume={22}, + number = {478}, + url = {https://doi.org/10.1186/s12859-021-04373-4} +} -- cgit v1.2.1