From 24eb77216700cf8b2f2bde3abad84c1f83f9e32a Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Mon, 9 Jan 2023 06:22:04 +0100 Subject: Prebuilt PDF vignettes, summary_listing --- vignettes/prebuilt/2022_cyan_pathway.rmd | 536 +++++++++++++++++++++++++++++++ 1 file changed, 536 insertions(+) create mode 100644 vignettes/prebuilt/2022_cyan_pathway.rmd (limited to 'vignettes/prebuilt/2022_cyan_pathway.rmd') diff --git a/vignettes/prebuilt/2022_cyan_pathway.rmd b/vignettes/prebuilt/2022_cyan_pathway.rmd new file mode 100644 index 00000000..df34a6f2 --- /dev/null +++ b/vignettes/prebuilt/2022_cyan_pathway.rmd @@ -0,0 +1,536 @@ +--- +title: "Testing hierarchical pathway kinetics with residue data on cyantraniliprole" +author: Johannes Ranke +date: Last change on 6 January 2023, last compiled on `r format(Sys.time(), "%e + %B %Y")` +output: + pdf_document: + extra_dependencies: ["float", "listing"] +toc: yes +geometry: margin=2cm +--- + +```{r setup, echo = FALSE, cache = FALSE} +options(width = 80) # For summary listings +knitr::opts_chunk$set( + comment = "", tidy = FALSE, cache = TRUE, fig.pos = "H", fig.align = "center" +) +``` + +\clearpage + +# Introduction + +The purpose of this document is to test demonstrate how nonlinear hierarchical +models (NLHM) based on the parent degradation models SFO, FOMC, DFOP and HS, +with serial formation of two or more metabolites can be fitted with the mkin +package. + +It was assembled in the course of work package 1.2 of Project Number 173340 +(Application of nonlinear hierarchical models to the kinetic evaluation of +chemical degradation data) of the German Environment Agency carried out in 2022 +and 2023. + +The mkin package is used in version `r packageVersion("mkin")` which is +currently under development. The newly introduced functionality that is +used here is a simplification of excluding random effects for a set of fits +based on a related set of fits with a reduced model, and the documentation of +the starting parameters of the fit, so that all starting parameters of `saem` +fits are now listed in the summary. The `saemix` package is used as a backend +for fitting the NLHM, but is also loaded to make the convergence plot function +available. + +This document is processed with the `knitr` package, which also provides the +`kable` function that is used to improve the display of tabular data in R +markdown documents. For parallel processing, the `parallel` package is used. + +```{r, packages, cache = FALSE, message = FALSE} +library(mkin) +library(knitr) +library(saemix) +library(parallel) +n_cores <- detectCores() +if (Sys.info()["sysname"] == "Windows") { + cl <- makePSOCKcluster(n_cores) +} else { + cl <- makeForkCluster(n_cores) +} +``` + +\clearpage + +## Test data + +The example data are taken from the final addendum to the DAR from 2014 +and are distributed with the mkin package. Residue data and time step +normalisation factors are read in using the function `read_spreadsheet` from +the mkin package. This function also performs the time step normalisation. + +```{r data} +data_file <- system.file( + "testdata", "cyantraniliprole_soil_efsa_2014.xlsx", + package = "mkin") +cyan_ds <- read_spreadsheet(data_file, parent_only = FALSE) +``` + +The following tables show the covariate data and the `r length(cyan_ds)` +datasets that were read in from the spreadsheet file. + +```{r show-covar-data, dependson = "data", results = "asis"} +pH <- attr(cyan_ds, "covariates") +kable(pH, caption = "Covariate data") +``` + +\clearpage + +```{r show-data, dependson = "data", results = "asis"} +for (ds_name in names(cyan_ds)) { + print( + kable(mkin_long_to_wide(cyan_ds[[ds_name]]), + caption = paste("Dataset", ds_name), + booktabs = TRUE, row.names = FALSE)) + cat("\n\\clearpage\n") +} +``` + +\clearpage + +# Parent only evaluations + +As the pathway fits have very long run times, evaluations of the parent data +are performed first, in order to determine for each hierarchical parent +degradation model which random effects on the degradation model parameters are +ill-defined. + +```{r parent-only, dependson = "data"} +cyan_sep_const <- mmkin(c("SFO", "FOMC", "DFOP", "SFORB", "HS"), + cyan_ds, quiet = TRUE, cores = n_cores) +cyan_sep_tc <- update(cyan_sep_const, error_model = "tc") +cyan_saem_full <- mhmkin(list(cyan_sep_const, cyan_sep_tc)) +status(cyan_saem_full) |> kable() +``` + +All fits converged successfully. + +```{r dependson = "parent-only"} +illparms(cyan_saem_full) |> kable() +``` +In almost all models, the random effect for the initial concentration of the +parent compound is ill-defined. For the biexponential models DFOP and SFORB, +the random effect of one additional parameter is ill-defined when the two-component +error model is used. + +```{r dependson = "parent-only"} +anova(cyan_saem_full) |> kable(digits = 1) +``` + +Model comparison based on AIC and BIC indicates that the two-component error model +is preferable for all parent models with the exception of DFOP. The lowest AIC +and BIC values are are obtained with the FOMC model, followed by SFORB and DFOP. + +```{r parent-only-reduced, dependson = "parent-only", include = FALSE} +cyan_saem_reduced <- mhmkin(list(cyan_sep_const, cyan_sep_tc), + no_random_effect = illparms(cyan_saem_full)) +illparms(cyan_saem_reduced) +anova(cyan_saem_reduced) |> kable(digits = 1) +``` + +# Pathway fits + +## Evaluations with pathway established previously + +To test the technical feasibility of coupling the relevant parent degradation +models with different transformation pathway models, a list of `mkinmod` models +is set up below. As in the EU evaluation, parallel formation of metabolites +JCZ38 and J9Z38 and secondary formation of metabolite JSE76 from JCZ38 is used. + +```{r, cyan-path-1} +if (!dir.exists("cyan_dlls")) dir.create("cyan_dlls") +cyan_path_1 <- list( + sfo_path_1 = mkinmod( + cyan = mkinsub("SFO", c("JCZ38", "J9Z38")), + JCZ38 = mkinsub("SFO", "JSE76"), + J9Z38 = mkinsub("SFO"), + JSE76 = mkinsub("SFO"), quiet = TRUE, + name = "sfo_path_1", dll_dir = "cyan_dlls", overwrite = TRUE), + fomc_path_1 = mkinmod( + cyan = mkinsub("FOMC", c("JCZ38", "J9Z38")), + JCZ38 = mkinsub("SFO", "JSE76"), + J9Z38 = mkinsub("SFO"), + JSE76 = mkinsub("SFO"), quiet = TRUE, + name = "fomc_path_1", dll_dir = "cyan_dlls", overwrite = TRUE), + dfop_path_1 = mkinmod( + cyan = mkinsub("DFOP", c("JCZ38", "J9Z38")), + JCZ38 = mkinsub("SFO", "JSE76"), + J9Z38 = mkinsub("SFO"), + JSE76 = mkinsub("SFO"), quiet = TRUE, + name = "dfop_path_1", dll_dir = "cyan_dlls", overwrite = TRUE), + sforb_path_1 = mkinmod( + cyan = mkinsub("SFORB", c("JCZ38", "J9Z38")), + JCZ38 = mkinsub("SFO", "JSE76"), + J9Z38 = mkinsub("SFO"), + JSE76 = mkinsub("SFO"), quiet = TRUE, + name = "sforb_path_1", dll_dir = "cyan_dlls", overwrite = TRUE), + hs_path_1 = mkinmod( + cyan = mkinsub("HS", c("JCZ38", "J9Z38")), + JCZ38 = mkinsub("SFO", "JSE76"), + J9Z38 = mkinsub("SFO"), + JSE76 = mkinsub("SFO"), quiet = TRUE, + name = "hs_path_1", dll_dir = "cyan_dlls", overwrite = TRUE) +) +``` +To obtain suitable starting values for the NLHM fits, separate pathway fits are +performed for all datasets. + +```{r, f-sep-1, dependson = c("data", "cyan_path_1")} +f_sep_1_const <- mmkin( + cyan_path_1, + cyan_ds, + error_model = "const", + cluster = cl, + quiet = TRUE) +status(f_sep_1_const) |> kable() + +f_sep_1_tc <- update(f_sep_1_const, error_model = "tc") +status(f_sep_1_tc) |> kable() +``` + +Most separate fits converged successfully. The biggest convergence +problems are seen when using the HS model with constant variance. + +For the hierarchical pathway fits, those random effects that could not be +quantified in the corresponding parent data analyses are excluded. + +In the code below, the output of the `illparms` function for the parent only +fits is used as an argument `no_random_effect` to the `mhmkin` function. +The possibility to do so was introduced in mkin version `1.2.2` which is +currently under development. + +```{r, f-saem-1, dependson = "f-sep-1"} +f_saem_1 <- mhmkin(list(f_sep_1_const, f_sep_1_tc), + no_random_effect = illparms(cyan_saem_full), + cluster = cl) +``` + +```{r dependson = "f-saem-1"} +status(f_saem_1) |> kable() +``` + +The status information from the individual fits shows that all fits completed +successfully. The matrix entries Fth and FO indicate that the Fisher +Information Matrix could not be inverted for the fixed effects (theta) +and the random effects (Omega), respectively. For the affected fits, +ill-defined parameters cannot be determined using the `illparms` function, +because it relies on the Fisher Information Matrix. + +```{r dependson = "f-saem-1"} +illparms(f_saem_1) |> kable() +``` + +The model comparison below suggests that the pathway fits using +DFOP or SFORB for the parent compound provide the best fit. + +```{r, dependson = "f-saem-1"} +anova(f_saem_1) |> kable(digits = 1) +``` + +For these two parent model, successful fits are shown below. Plots of the fits +with the other parent models are shown in the Appendix. + +```{r fig.cap = "DFOP pathway fit with two-component error", dependson = "f-saem-1", fig.height = 8} +plot(f_saem_1[["dfop_path_1", "tc"]]) +``` + +\clearpage + +```{r fig.cap = "SFORB pathway fit with two-component error", dependson = "f-saem-1", fig.height = 8} +plot(f_saem_1[["sforb_path_1", "tc"]]) +``` + +A closer graphical analysis of these Figures shows that the residues of +transformation product JCZ38 in the soils Tama and Nambsheim observed +at later time points are strongly and systematically underestimated. + +\clearpage + + +## Alternative pathway fits + +To improve the fit for JCZ38, a back-reaction from JSE76 to JCZ38 was +introduced in an alternative version of the transformation pathway, in analogy +to the back-reaction from K5A78 to K5A77. Both pairs of transformation products +are pairs of an organic acid with its corresponding amide (Addendum 2014, p. +109). As FOMC provided the best fit for the parent, and the biexponential +models DFOP and SFORB provided the best initial pathway fits, these three +parent models are used in the alternative pathway fits. + +```{r, f-sep-2-const, dependson = "data"} +cyan_path_2 <- list( + fomc_path_2 = mkinmod( + cyan = mkinsub("FOMC", c("JCZ38", "J9Z38")), + JCZ38 = mkinsub("SFO", "JSE76"), + J9Z38 = mkinsub("SFO"), + JSE76 = mkinsub("SFO", "JCZ38"), + name = "fomc_path_2", quiet = TRUE, + dll_dir = "cyan_dlls", + overwrite = TRUE + ), + dfop_path_2 = mkinmod( + cyan = mkinsub("DFOP", c("JCZ38", "J9Z38")), + JCZ38 = mkinsub("SFO", "JSE76"), + J9Z38 = mkinsub("SFO"), + JSE76 = mkinsub("SFO", "JCZ38"), + name = "dfop_path_2", quiet = TRUE, + dll_dir = "cyan_dlls", + overwrite = TRUE + ), + sforb_path_2 = mkinmod( + cyan = mkinsub("SFORB", c("JCZ38", "J9Z38")), + JCZ38 = mkinsub("SFO", "JSE76"), + J9Z38 = mkinsub("SFO"), + JSE76 = mkinsub("SFO", "JCZ38"), + name = "sforb_path_2", quiet = TRUE, + dll_dir = "cyan_dlls", + overwrite = TRUE + ) +) +f_sep_2_const <- mmkin( + cyan_path_2, + cyan_ds, + error_model = "const", + cluster = cl, + quiet = TRUE) + +status(f_sep_2_const) |> kable() +``` + +Using constant variance, separate fits converge with the exception +of the fits to the Sassafras soil data. + +```{r f-sep-2-tc, dependson = "f-sep-2-const"} +f_sep_2_tc <- update(f_sep_2_const, error_model = "tc") +status(f_sep_2_tc) |> kable() +``` + +Using the two-component error model, all separate fits converge with the +exception of the alternative pathway fit with DFOP used for the parent and the +Sassafras dataset. + +```{r f-saem-2, dependson = c("f-sep-2-const", "f-sep-2-tc")} +f_saem_2 <- mhmkin(list(f_sep_2_const, f_sep_2_tc), + no_random_effect = illparms(cyan_saem_full[2:4, ]), + cluster = cl) +``` + +```{r dependson = "f-saem-2"} +status(f_saem_2) |> kable() +``` + +The hierarchical fits for the alternative pathway completed successfully. + +```{r dependson = "f-saem-2"} +illparms(f_saem_2) |> kable() +``` + +In both fits, the random effects for the formation fractions for the +pathways from JCZ38 to JSE76, and for the reverse pathway from JSE76 +to JCZ38 are ill-defined. + +```{r dependson = "f-saem-2"} +anova(f_saem_2) |> kable(digits = 1) +``` + +The variants using the biexponential models DFOP and SFORB for the parent +compound and the two-component error model give the lowest AIC and BIC values +and are plotted below. Compared with the original pathway, the AIC and BIC +values indicate a large improvement. This is confirmed by the plots, which show +that the metabolite JCZ38 is fitted much better with this model. + +\clearpage + +```{r fig.cap = "FOMC pathway fit with two-component error, alternative pathway", dependson = "f-saem-2", fig.height = 8} +plot(f_saem_2[["fomc_path_2", "tc"]]) +``` +\clearpage + +```{r fig.cap = "DFOP pathway fit with two-component error, alternative pathway", dependson = "f-saem-2", fig.height = 8} +plot(f_saem_2[["dfop_path_2", "tc"]]) +``` + +\clearpage + +```{r fig.cap = "SFORB pathway fit with two-component error, alternative pathway", dependson = "f-saem-2", fig.height = 8} +plot(f_saem_2[["sforb_path_2", "tc"]]) +``` + +\clearpage + +## Refinement of alternative pathway fits + +All ill-defined random effects that were identified in the parent only fits and +in the above pathway fits, are excluded for the final evaluations below. +For this purpose, a list of character vectors is created below that can be indexed +by row and column indices, and which contains the degradation parameter names for which +random effects should be excluded for each of the hierarchical fits contained +in `f_saem_2`. + +```{r f-saem-3, dependson = "f-saem-2"} +no_ranef <- matrix(list(), nrow = 3, ncol = 2, dimnames = dimnames(f_saem_2)) +no_ranef[["fomc_path_2", "const"]] <- c("log_beta", "f_JCZ38_qlogis", "f_JSE76_qlogis") +no_ranef[["fomc_path_2", "tc"]] <- c("cyan_0", "f_JCZ38_qlogis", "f_JSE76_qlogis") +no_ranef[["dfop_path_2", "const"]] <- c("cyan_0", "f_JCZ38_qlogis", "f_JSE76_qlogis") +no_ranef[["dfop_path_2", "tc"]] <- c("cyan_0", "log_k1", "f_JCZ38_qlogis", "f_JSE76_qlogis") +no_ranef[["sforb_path_2", "const"]] <- c("cyan_free_0", + "f_JCZ38_qlogis", "f_JSE76_qlogis") +no_ranef[["sforb_path_2", "tc"]] <- c("cyan_free_0", "log_k_cyan_free_bound", + "f_JCZ38_qlogis", "f_JSE76_qlogis") +clusterExport(cl, "no_ranef") + +f_saem_3 <- update(f_saem_2, + no_random_effect = no_ranef, + cluster = cl) +``` + +```{r dependson = "f-saem-3"} +status(f_saem_3) |> kable() +``` + +With the exception of the FOMC pathway fit with constant variance, all updated +fits completed successfully. However, the Fisher Information Matrix for the +fixed effects (Fth) could not be inverted, so no confidence intervals for the +optimised parameters are available. + +```{r dependson = "f-saem-3"} +illparms(f_saem_3) |> kable() +``` + +```{r dependson = "f-saem-3"} +anova(f_saem_3) |> kable(digits = 1) +``` + +While the AIC and BIC values of the best fit (DFOP pathway fit with +two-component error) are lower than in the previous fits with the alternative +pathway, the practical value of these refined evaluations is limited +as no confidence intervals are obtained. + +\clearpage + +# Conclusion + +It was demonstrated that a relatively complex transformation pathway with +parallel formation of two primary metabolites and one secondary metabolite +can be fitted even if the data in the individual datasets are quite different +and partly only cover the formation phase. + +The run times of the pathway fits were several hours, limiting the +practical feasibility of iterative refinements based on ill-defined +parameters and of alternative checks of parameter identifiability +based on multistart runs. + +# Acknowledgements + +The helpful comments by Janina Wöltjen of the German Environment Agency +are gratefully acknowledged. + +\clearpage + +# Appendix + +## Plots of fits that were not refined further + +```{r fig.cap = "SFO pathway fit with two-component error", dependson = "f-saem-1", fig.height = 8} +plot(f_saem_1[["sfo_path_1", "tc"]]) +``` + +\clearpage + +```{r fig.cap = "FOMC pathway fit with two-component error", dependson = "f-saem-1", fig.height = 8} +plot(f_saem_1[["fomc_path_1", "tc"]]) +``` + +\clearpage + + +```{r fig.cap = "HS pathway fit with two-component error", dependson = "f-saem-1", fig.height = 8} +plot(f_saem_1[["sforb_path_1", "tc"]]) +``` + +\clearpage + + +## Hierarchical fit listings + +### Pathway 1 + +```{r listings-1, results = "asis", echo = FALSE, cache = FALSE} +errmods <- c(const = "constant variance", tc = "two-component error") +degmods <- c( + sfo_path_1 = "SFO path 1", + fomc_path_1 = "FOMC path 1", + dfop_path_1 = "DFOP path 1", + sforb_path_1 = "SFORB path 1", + hs_path_1 = "HS path 1") +for (deg_mod in rownames(f_saem_1)) { + for (err_mod in c("const", "tc")) { + fit <- f_saem_1[[deg_mod, err_mod]] + if (!inherits(fit$so, "try-error")) { + caption <- paste("Hierarchical", degmods[deg_mod], "fit with", errmods[err_mod]) + summary_listing(fit, caption) + } + } +} +``` + +### Pathway 2 + +```{r listings-2, results = "asis", echo = FALSE, cache = FALSE} +degmods <- c( + fomc_path_2 = "FOMC path 2", + dfop_path_2 = "DFOP path 2", + sforb_path_2 = "SFORB path 2") +for (deg_mod in rownames(f_saem_2)) { + for (err_mod in c("const", "tc")) { + fit <- f_saem_2[[deg_mod, err_mod]] + if (!inherits(fit$so, "try-error")) { + caption <- paste("Hierarchical", degmods[deg_mod], "fit with", errmods[err_mod]) + summary_listing(fit, caption) + } + } +} +``` + +### Pathway 2, refined fits + +```{r listings-3, results = "asis", echo = FALSE, cache = FALSE} +degmods <- c( + fomc_path_2 = "FOMC path 2", + dfop_path_2 = "DFOP path 2", + sforb_path_2 = "SFORB path 2") +for (deg_mod in rownames(f_saem_3)) { + for (err_mod in c("const", "tc")) { + fit <- f_saem_3[[deg_mod, err_mod]] + if (!inherits(fit$so, "try-error")) { + caption <- paste("Hierarchical", degmods[deg_mod], "fit with reduced random effects,", errmods[err_mod]) + summary_listing(fit, caption) + } + } +} +``` + +## Session info + +```{r, echo = FALSE, cache = FALSE} +parallel::stopCluster(cl = cl) +sessionInfo() +``` + +## Hardware info + +```{r, echo = FALSE} +if(!inherits(try(cpuinfo <- readLines("/proc/cpuinfo")), "try-error")) { + cat(gsub("model name\t: ", "CPU model: ", cpuinfo[grep("model name", cpuinfo)[1]])) +} +if(!inherits(try(meminfo <- readLines("/proc/meminfo")), "try-error")) { + cat(gsub("model name\t: ", "System memory: ", meminfo[grep("MemTotal", meminfo)[1]])) +} +``` -- cgit v1.2.1