From 24eb77216700cf8b2f2bde3abad84c1f83f9e32a Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Mon, 9 Jan 2023 06:22:04 +0100 Subject: Prebuilt PDF vignettes, summary_listing --- vignettes/prebuilt/2022_dmta_pathway.rmd | 426 +++++++++++++++++++++++++++++++ 1 file changed, 426 insertions(+) create mode 100644 vignettes/prebuilt/2022_dmta_pathway.rmd (limited to 'vignettes/prebuilt/2022_dmta_pathway.rmd') diff --git a/vignettes/prebuilt/2022_dmta_pathway.rmd b/vignettes/prebuilt/2022_dmta_pathway.rmd new file mode 100644 index 00000000..ff2b527c --- /dev/null +++ b/vignettes/prebuilt/2022_dmta_pathway.rmd @@ -0,0 +1,426 @@ +--- +title: "Testing hierarchical pathway kinetics with residue data on dimethenamid and dimethenamid-P" +author: Johannes Ranke +date: Last change on 8 January 2023, last compiled on `r format(Sys.time(), "%e %B %Y")` +geometry: margin=2cm +bibliography: references.bib +toc: true +output: + pdf_document: + extra_dependencies: ["float", "listing"] +--- + +```{r setup, echo = FALSE, cache = FALSE} +options(width = 80) # For summary listings +knitr::opts_chunk$set( + comment = "", tidy = FALSE, cache = TRUE, fig.pos = "H", fig.align = "center" +) +``` + +\clearpage + +# Introduction + +The purpose of this document is to test demonstrate how nonlinear hierarchical +models (NLHM) based on the parent degradation models SFO, FOMC, DFOP and HS, +with parallel formation of two or more metabolites can be fitted with the mkin package. + +It was assembled in the course of work package 1.2 of Project Number 173340 +(Application of nonlinear hierarchical models to the kinetic evaluation of +chemical degradation data) of the German Environment Agency carried out in 2022 +and 2023. + +The mkin package is used in version `r packageVersion("mkin")`, which is currently +under development. It contains the test data, and the functions used in the +evaluations. The `saemix` package is used as a backend for fitting the NLHM, +but is also loaded to make the convergence plot function available. + +This document is processed with the `knitr` package, which also provides the +`kable` function that is used to improve the display of tabular data in R +markdown documents. For parallel processing, the `parallel` package is used. + +```{r, packages, cache = FALSE, message = FALSE} +library(mkin) +library(knitr) +library(saemix) +library(parallel) +n_cores <- detectCores() +if (Sys.info()["sysname"] == "Windows") { + cl <- makePSOCKcluster(n_cores) +} else { + cl <- makeForkCluster(n_cores) +} +``` + +\clearpage + +# Data + +The test data are available in the mkin package as an object of class `mkindsg` +(mkin dataset group) under the identifier `dimethenamid_2018`. The following +preprocessing steps are done in this document. + +- The data available for the enantiomer dimethenamid-P (DMTAP) are renamed + to have the same substance name as the data for the racemic mixture + dimethenamid (DMTA). The reason for this is that no difference between their + degradation behaviour was identified in the EU risk assessment. +- Unnecessary columns are discarded +- The observation times of each dataset are multiplied with the + corresponding normalisation factor also available in the dataset, in order to + make it possible to describe all datasets with a single set of parameters + that are independent of temperature +- Finally, datasets observed in the same soil (`Elliot 1` and `Elliot 2`) are + combined, resulting in dimethenamid (DMTA) data from six soils. + +The following commented R code performs this preprocessing. + +```{r, data} +# Apply a function to each of the seven datasets in the mkindsg object to create a list +dmta_ds <- lapply(1:7, function(i) { + ds_i <- dimethenamid_2018$ds[[i]]$data # Get a dataset + ds_i[ds_i$name == "DMTAP", "name"] <- "DMTA" # Rename DMTAP to DMTA + ds_i <- subset(ds_i, select = c("name", "time", "value")) # Select data + ds_i$time <- ds_i$time * dimethenamid_2018$f_time_norm[i] # Normalise time + ds_i # Return the dataset +}) + +# Use dataset titles as names for the list elements +names(dmta_ds) <- sapply(dimethenamid_2018$ds, function(ds) ds$title) + +# Combine data for Elliot soil to obtain a named list with six elements +dmta_ds[["Elliot"]] <- rbind(dmta_ds[["Elliot 1"]], dmta_ds[["Elliot 2"]]) # +dmta_ds[["Elliot 1"]] <- NULL +dmta_ds[["Elliot 2"]] <- NULL +``` + +\clearpage + +The following tables show the `r length(dmta_ds)` datasets. + +```{r show-data, dependson = "data", results = "asis"} +for (ds_name in names(dmta_ds)) { + print( + kable(mkin_long_to_wide(dmta_ds[[ds_name]]), + caption = paste("Dataset", ds_name), + booktabs = TRUE, row.names = FALSE)) + cat("\n\\clearpage\n") +} +``` + +# Separate evaluations + +As a first step to obtain suitable starting parameters for the NLHM fits, we do +separate fits of several variants of the pathway model used previously +[@ranke2021], varying the kinetic model for the parent compound. Because the +SFORB model often provides faster convergence than the DFOP model, and can +sometimes be fitted where the DFOP model results in errors, it is included in +the set of parent models tested here. + +```{r, sep-1-const, dependson = "data"} +if (!dir.exists("dmta_dlls")) dir.create("dmta_dlls") +m_sfo_path_1 <- mkinmod( + DMTA = mkinsub("SFO", c("M23", "M27", "M31")), + M23 = mkinsub("SFO"), + M27 = mkinsub("SFO"), + M31 = mkinsub("SFO", "M27", sink = FALSE), + name = "m_sfo_path", dll_dir = "dmta_dlls", + unload = TRUE, overwrite = TRUE, + quiet = TRUE +) +m_fomc_path_1 <- mkinmod( + DMTA = mkinsub("FOMC", c("M23", "M27", "M31")), + M23 = mkinsub("SFO"), + M27 = mkinsub("SFO"), + M31 = mkinsub("SFO", "M27", sink = FALSE), + name = "m_fomc_path", dll_dir = "dmta_dlls", + unload = TRUE, overwrite = TRUE, + quiet = TRUE +) +m_dfop_path_1 <- mkinmod( + DMTA = mkinsub("DFOP", c("M23", "M27", "M31")), + M23 = mkinsub("SFO"), + M27 = mkinsub("SFO"), + M31 = mkinsub("SFO", "M27", sink = FALSE), + name = "m_dfop_path", dll_dir = "dmta_dlls", + unload = TRUE, overwrite = TRUE, + quiet = TRUE +) +m_sforb_path_1 <- mkinmod( + DMTA = mkinsub("SFORB", c("M23", "M27", "M31")), + M23 = mkinsub("SFO"), + M27 = mkinsub("SFO"), + M31 = mkinsub("SFO", "M27", sink = FALSE), + name = "m_sforb_path", dll_dir = "dmta_dlls", + unload = TRUE, overwrite = TRUE, + quiet = TRUE +) +m_hs_path_1 <- mkinmod( + DMTA = mkinsub("HS", c("M23", "M27", "M31")), + M23 = mkinsub("SFO"), + M27 = mkinsub("SFO"), + M31 = mkinsub("SFO", "M27", sink = FALSE), + name = "m_hs_path", dll_dir = "dmta_dlls", + unload = TRUE, overwrite = TRUE, + quiet = TRUE +) +deg_mods_1 <- list( + sfo_path_1 = m_sfo_path_1, + fomc_path_1 = m_fomc_path_1, + dfop_path_1 = m_dfop_path_1, + sforb_path_1 = m_sforb_path_1, + hs_path_1 = m_hs_path_1) + +sep_1_const <- mmkin( + deg_mods_1, + dmta_ds, + error_model = "const", + quiet = TRUE) + +status(sep_1_const) |> kable() +``` + +All separate pathway fits with SFO or FOMC for the parent and constant variance +converged (status OK). Most fits with DFOP or SFORB for the parent converged +as well. The fits with HS for the parent did not converge with default settings. + +```{r, sep-1-tc, dependson = "sep-1-const"} +sep_1_tc <- update(sep_1_const, error_model = "tc") +status(sep_1_tc) |> kable() +``` + +With the two-component error model, the set of fits with convergence problems +is slightly different, with convergence problems appearing for different data +sets when applying the DFOP and SFORB model and some additional convergence +problems when using the FOMC model for the parent. + +\clearpage + +# Hierarchichal model fits + +The following code fits two sets of the corresponding hierarchical models to +the data, one assuming constant variance, and one assuming two-component error. + +```{r saem-1, dependson = c("sep-1-const", "sep-1-tc")} +saem_1 <- mhmkin(list(sep_1_const, sep_1_tc)) +``` +The run time for these fits was around two hours on five year old hardware. After +a recent hardware upgrade these fits complete in less than twenty minutes. + +```{r, saem-1-status, dependson = "saem-1"} +status(saem_1) |> kable() +``` + +According to the `status` function, all fits terminated successfully. + +```{r saem-1-anova, dependson = "saem-1"} +anova(saem_1) |> kable(digits = 1) +``` + +When the goodness-of-fit of the models is compared, a warning is obtained, +indicating that the likelihood of the pathway fit with SFORB for the parent +compound and constant variance could not be calculated with importance sampling +(method 'is'). As this is the default method on which all AIC and BIC +comparisons are based, this variant is not included in the model comparison +table. Comparing the goodness-of-fit of the remaining models, HS model model +with two-component error provides the best fit. However, for batch experiments +performed with constant conditions such as the experiments evaluated here, +there is no reason to assume a discontinuity, so the SFORB model is +preferable from a mechanistic viewpoint. In addition, the information criteria +AIC and BIC are very similar for HS and SFORB. Therefore, the SFORB model is +selected here for further refinements. + +\clearpage + +## Parameter identifiability based on the Fisher Information Matrix + +Using the `illparms` function, ill-defined statistical model parameters such as +standard deviations of the degradation parameters in the population and error +model parameters can be found. + +```{r saem-1-illparms, dependson = "saem-1"} +illparms(saem_1) |> kable() +``` + +When using constant variance, no ill-defined variance parameters are identified +with the `illparms` function in any of the degradation models. When using +the two-component error model, there is one ill-defined variance parameter +in all variants except for the variant using DFOP for the parent compound. + +For the selected combination of the SFORB pathway model with two-component +error, the random effect for the rate constant from reversibly bound DMTA to +the free DMTA (`k_DMTA_bound_free`) is not well-defined. Therefore, the fit is +updated without assuming a random effect for this parameter. + +```{r saem-sforb-path-1-tc-reduced, dependson = "saem-1"} +saem_sforb_path_1_tc_reduced <- update(saem_1[["sforb_path_1", "tc"]], + no_random_effect = "log_k_DMTA_bound_free") +illparms(saem_sforb_path_1_tc_reduced) +``` + +As expected, no ill-defined parameters remain. The model comparison below shows +that the reduced model is preferable. + +```{r saem-sforb-path-1-tc-reduced-anova, dependson = "saem-sforb-path-1-tc-reduced"} +anova(saem_1[["sforb_path_1", "tc"]], saem_sforb_path_1_tc_reduced) |> kable(digits = 1) +``` + +The convergence plot of the refined fit is shown below. + +```{r saem-sforb-path-1-tc-reduced-convergence, dependson = "saem-sforb-path-1-tc-reduced", fig.height = 12} +plot(saem_sforb_path_1_tc_reduced$so, plot.type = "convergence") +``` + +For some parameters, for example for `f_DMTA_ilr_1` and `f_DMTA_ilr_2`, i.e. +for two of the parameters determining the formation fractions of the parallel +formation of the three metabolites, some movement of the parameters is still +visible in the second phase of the algorithm. However, the amplitude of this +movement is in the range of the amplitude towards the end of the first phase. +Therefore, it is likely that an increase in iterations would not improve the +parameter estimates very much, and it is proposed that the fit is acceptable. +No numeric convergence criterion is implemented in saemix. + +\clearpage + +## Alternative check of parameter identifiability + +As an alternative check of parameter identifiability [@duchesne_2021], +multistart runs were performed on the basis of the refined fit shown above. + +```{r saem-sforb-multistart, dependson = "saem-sforb-path-1-tc-reduced"} +saem_sforb_path_1_tc_reduced_multi <- multistart(saem_sforb_path_1_tc_reduced, + n = 32, cores = 10) +``` + +```{r dependson = "saem-sforb-multistart"} +print(saem_sforb_path_1_tc_reduced_multi) +``` + +Out of the 32 fits that were initiated, only 17 terminated without an error. +The reason for this is that the wide variation of starting parameters in combination +with the parameter variation that is used in the SAEM algorithm leads to +parameter combinations for the degradation model that the numerical integration +routine cannot cope with. Because of this variation of initial parameters, +some of the model fits take up to two times more time than the original fit. + +```{r dependson = "saem-sforb-multistart", fig.cap = "Parameter boxplots for the multistart runs that succeeded", fig.height = 6, fig.width = 10} +par(mar = c(12.1, 4.1, 2.1, 2.1)) +parplot(saem_sforb_path_1_tc_reduced_multi, ylim = c(0.5, 2), las = 2) +``` + +However, visual analysis of the boxplot of the parameters obtained in the +successful fits confirms that the results are sufficiently independent of the +starting parameters, and there are no remaining ill-defined parameters. + +\clearpage + + +# Plots of selected fits + +The SFORB pathway fits with full and reduced parameter distribution model are +shown below. + +```{r fig.cap = "SFORB pathway fit with two-component error", dependson = "saem-1", fig.height = 8} +plot(saem_1[["sforb_path_1", "tc"]]) +``` + +\clearpage + +```{r fig.cap = "SFORB pathway fit with two-component error, reduced parameter model", dependson = "saem-sforb-path-1-tc-reduced", fig.height = 8} +plot(saem_sforb_path_1_tc_reduced) +``` + +Plots of the remaining fits and listings for all successful fits are shown in +the Appendix. + + +# Conclusions + +Pathway fits with SFO, FOMC, DFOP, SFORB and HS models for the parent compound +could be successfully performed. + +\clearpage + +# Acknowledgements + +The helpful comments by Janina Wöltjen of the German Environment Agency +on earlier versions of this document are gratefully acknowledged. + +# References + +\vspace{1em} + +::: {#refs} +::: + +\clearpage + +# Appendix + +## Plots of hierarchical fits not selected for refinement + +```{r fig.cap = "SFO pathway fit with two-component error", dependson = "saem-1", fig.height = 8} +plot(saem_1[["sfo_path_1", "tc"]]) +``` + +\clearpage + +```{r fig.cap = "FOMC pathway fit with two-component error", dependson = "saem-1", fig.height = 8} +plot(saem_1[["fomc_path_1", "tc"]]) +``` + +\clearpage + + +```{r fig.cap = "HS pathway fit with two-component error", dependson = "saem-1", fig.height = 8} +plot(saem_1[["sforb_path_1", "tc"]]) +``` + +\clearpage + +## Hierarchical model fit listings + +### Fits with random effects for all degradation parameters + +```{r listings-1, results = "asis", echo = FALSE} +errmods <- c(const = "constant variance", tc = "two-component error") +degmods <- c( + sfo_path_1 = "SFO path 1", + fomc_path_1 = "FOMC path 1", + dfop_path_1 = "DFOP path 1", + sforb_path_1 = "SFORB path 1", + hs_path_1 = "HS path 1") +for (deg_mod in rownames(saem_1)) { + for (err_mod in c("const", "tc")) { + fit <- saem_1[[deg_mod, err_mod]] + if (!inherits(fit$so, "try-error")) { + caption <- paste("Hierarchical", degmods[deg_mod], "fit with", errmods[err_mod]) + tex_listing(fit, caption) + } + } +} +``` + +### Improved fit of the SFORB pathway model with two-component error + +```{r listings-2, results = "asis", echo = FALSE, dependson = "listings-1"} +caption <- paste("Hierarchical SFORB pathway fit with two-component error") +tex_listing(saem_sforb_path_1_tc_reduced, caption) +``` + +## Session info + +```{r, echo = FALSE} +parallel::stopCluster(cl) +sessionInfo() +``` + +## Hardware info + +```{r, echo = FALSE} +if(!inherits(try(cpuinfo <- readLines("/proc/cpuinfo")), "try-error")) { + cat(gsub("model name\t: ", "CPU model: ", cpuinfo[grep("model name", cpuinfo)[1]])) +} +if(!inherits(try(meminfo <- readLines("/proc/meminfo")), "try-error")) { + cat(gsub("model name\t: ", "System memory: ", meminfo[grep("MemTotal", meminfo)[1]])) +} +``` -- cgit v1.2.1