diff options
author | Johannes Ranke <jranke@uni-bremen.de> | 2023-02-13 05:19:08 +0100 |
---|---|---|
committer | Johannes Ranke <jranke@uni-bremen.de> | 2023-02-13 05:19:08 +0100 |
commit | 8d1a84ac2190538ed3bac53a303064e281595868 (patch) | |
tree | acb894d85ab7ec87c4911c355a5264a77e08e34b /inst | |
parent | 51d63256a7b3020ee11931d61b4db97b9ded02c0 (diff) | |
parent | 4200e566ad2600f56bc3987669aeab88582139eb (diff) |
Merge branch 'main' into custom_lsoda_call
Diffstat (limited to 'inst')
-rw-r--r-- | inst/dataset_generation/ds_mixed.R | 105 | ||||
-rw-r--r-- | inst/rmarkdown/templates/hierarchical_kinetics/skeleton/header.tex | 1 | ||||
-rw-r--r-- | inst/rmarkdown/templates/hierarchical_kinetics/skeleton/skeleton.Rmd | 314 | ||||
-rw-r--r-- | inst/rmarkdown/templates/hierarchical_kinetics/template.yaml | 3 | ||||
-rw-r--r-- | inst/testdata/cyantraniliprole_soil_efsa_2014.xlsx | bin | 0 -> 35878 bytes | |||
-rw-r--r-- | inst/testdata/lambda-cyhalothrin_soil_efsa_2014.xlsx | bin | 0 -> 36231 bytes |
6 files changed, 423 insertions, 0 deletions
diff --git a/inst/dataset_generation/ds_mixed.R b/inst/dataset_generation/ds_mixed.R new file mode 100644 index 00000000..f2ae6e7e --- /dev/null +++ b/inst/dataset_generation/ds_mixed.R @@ -0,0 +1,105 @@ +# Synthetic data for hierarchical kinetic models +# Refactored version of the code previously in tests/testthat/setup_script.R +# The number of datasets was 3 for FOMC, and 10 for HS in that script, now it +# is always 15 for consistency + +library(mkin) # We use mkinmod and mkinpredict +sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) +n <- 15 +log_sd <- 0.3 +err_1 = list(const = 1, prop = 0.05) +tc <- function(value) sigma_twocomp(value, err_1$const, err_1$prop) +const <- function(value) 2 + +set.seed(123456) +SFO <- mkinmod(parent = mkinsub("SFO")) +sfo_pop <- list(parent_0 = 100, k_parent = 0.03) +sfo_parms <- as.matrix(data.frame( + k_parent = rlnorm(n, log(sfo_pop$k_parent), log_sd))) +set.seed(123456) +ds_sfo <- lapply(1:n, function(i) { + ds_mean <- mkinpredict(SFO, sfo_parms[i, ], + c(parent = sfo_pop$parent_0), sampling_times) + add_err(ds_mean, tc, n = 1)[[1]] +}) +attr(ds_sfo, "pop") <- sfo_pop +attr(ds_sfo, "parms") <- sfo_parms + +set.seed(123456) +FOMC <- mkinmod(parent = mkinsub("FOMC")) +fomc_pop <- list(parent_0 = 100, alpha = 2, beta = 8) +fomc_parms <- as.matrix(data.frame( + alpha = rlnorm(n, log(fomc_pop$alpha), 0.4), + beta = rlnorm(n, log(fomc_pop$beta), 0.2))) +set.seed(123456) +ds_fomc <- lapply(1:n, function(i) { + ds_mean <- mkinpredict(FOMC, fomc_parms[i, ], + c(parent = fomc_pop$parent_0), sampling_times) + add_err(ds_mean, tc, n = 1)[[1]] +}) +attr(ds_fomc, "pop") <- fomc_pop +attr(ds_fomc, "parms") <- fomc_parms + +set.seed(123456) +DFOP <- mkinmod(parent = mkinsub("DFOP")) +dfop_pop <- list(parent_0 = 100, k1 = 0.06, k2 = 0.015, g = 0.4) +dfop_parms <- as.matrix(data.frame( + k1 = rlnorm(n, log(dfop_pop$k1), log_sd), + k2 = rlnorm(n, log(dfop_pop$k2), log_sd), + g = plogis(rnorm(n, qlogis(dfop_pop$g), log_sd)))) +set.seed(123456) +ds_dfop <- lapply(1:n, function(i) { + ds_mean <- mkinpredict(DFOP, dfop_parms[i, ], + c(parent = dfop_pop$parent_0), sampling_times) + add_err(ds_mean, tc, n = 1)[[1]] +}) +attr(ds_dfop, "pop") <- dfop_pop +attr(ds_dfop, "parms") <- dfop_parms + +set.seed(123456) +HS <- mkinmod(parent = mkinsub("HS")) +hs_pop <- list(parent_0 = 100, k1 = 0.08, k2 = 0.01, tb = 15) +hs_parms <- as.matrix(data.frame( + k1 = rlnorm(n, log(hs_pop$k1), log_sd), + k2 = rlnorm(n, log(hs_pop$k2), log_sd), + tb = rlnorm(n, log(hs_pop$tb), 0.1))) +set.seed(123456) +ds_hs <- lapply(1:n, function(i) { + ds_mean <- mkinpredict(HS, hs_parms[i, ], + c(parent = hs_pop$parent_0), sampling_times) + add_err(ds_mean, const, n = 1)[[1]] +}) +attr(ds_hs, "pop") <- hs_pop +attr(ds_hs, "parms") <- hs_parms + +set.seed(123456) +DFOP_SFO <- mkinmod( + parent = mkinsub("DFOP", "m1"), + m1 = mkinsub("SFO"), + quiet = TRUE) +dfop_sfo_pop <- list(parent_0 = 100, + k_m1 = 0.007, f_parent_to_m1 = 0.5, + k1 = 0.1, k2 = 0.02, g = 0.5) +dfop_sfo_parms <- as.matrix(data.frame( + k1 = rlnorm(n, log(dfop_sfo_pop$k1), log_sd), + k2 = rlnorm(n, log(dfop_sfo_pop$k2), log_sd), + g = plogis(rnorm(n, qlogis(dfop_sfo_pop$g), log_sd)), + f_parent_to_m1 = plogis(rnorm(n, + qlogis(dfop_sfo_pop$f_parent_to_m1), log_sd)), + k_m1 = rlnorm(n, log(dfop_sfo_pop$k_m1), log_sd))) +ds_dfop_sfo_mean <- lapply(1:n, + function(i) { + mkinpredict(DFOP_SFO, dfop_sfo_parms[i, ], + c(parent = dfop_sfo_pop$parent_0, m1 = 0), sampling_times) + } +) +set.seed(123456) +ds_dfop_sfo <- lapply(ds_dfop_sfo_mean, function(ds) { + add_err(ds, + sdfunc = function(value) sqrt(err_1$const^2 + value^2 * err_1$prop^2), + n = 1, secondary = "m1")[[1]] +}) +attr(ds_dfop_sfo, "pop") <- dfop_sfo_pop +attr(ds_dfop_sfo, "parms") <- dfop_sfo_parms + +#save(ds_sfo, ds_fomc, ds_dfop, ds_hs, ds_dfop_sfo, file = "data/ds_mixed.rda", version = 2) diff --git a/inst/rmarkdown/templates/hierarchical_kinetics/skeleton/header.tex b/inst/rmarkdown/templates/hierarchical_kinetics/skeleton/header.tex new file mode 100644 index 00000000..a2b7ce83 --- /dev/null +++ b/inst/rmarkdown/templates/hierarchical_kinetics/skeleton/header.tex @@ -0,0 +1 @@ +\definecolor{shadecolor}{RGB}{248,248,248} diff --git a/inst/rmarkdown/templates/hierarchical_kinetics/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/hierarchical_kinetics/skeleton/skeleton.Rmd new file mode 100644 index 00000000..38a6bd20 --- /dev/null +++ b/inst/rmarkdown/templates/hierarchical_kinetics/skeleton/skeleton.Rmd @@ -0,0 +1,314 @@ +--- +title: "Hierarchical kinetic modelling of degradation data" +author: +date: +output: mkin::hierarchical_kinetics +geometry: margin=2cm +--- + +\clearpage + +# Setup + +```{r packages, cache = FALSE, message = FALSE} +library(mkin) +library(knitr) +library(saemix) +library(parallel) +library(readxl) +``` + +```{r n_cores, cache = FALSE} +n_cores <- detectCores() + +if (Sys.info()["sysname"] == "Windows") { + cl <- makePSOCKcluster(n_cores) +} else { + cl <- makeForkCluster(n_cores) +} +``` + +\clearpage + +# Introduction + +This report shows hierarchical kinetic modelling for ... +The data were obtained from ... + +```{r ds} +data_path <- system.file( + "testdata", "lambda-cyhalothrin_soil_efsa_2014.xlsx", + package = "mkin") +ds <- read_spreadsheet(data_path, valid_datasets = c(1:4, 7:13)) +covariates <- attr(ds, "covariates") +``` + +The covariate data are shown below. + +```{r results = "asis", dependson = "ds", echo = FALSE} +kable(covariates, caption = "Covariate data for all datasets") +``` + +\clearpage + +The datasets with the residue time series are shown in the tables below. Please +refer to the spreadsheet for details like data sources, treatment of values +below reporting limits and time step normalisation factors. + +```{r results = "asis", dependson = "ds", echo = FALSE} +for (ds_name in names(ds)) { + print( + kable(mkin_long_to_wide(ds[[ds_name]]), + caption = paste("Dataset", ds_name), + booktabs = TRUE, row.names = FALSE)) + cat("\n\\clearpage\n") +} +``` + +# Parent only evaluations + +The following code performs separate fits of the candidate degradation models +to all datasets using constant variance and the two-component error model. + +```{r parent-sep, dependson = "ds"} +parent_deg_mods <- c("SFO", "FOMC", "DFOP", "SFORB") +errmods <- c(const = "constant variance", tc = "two-component error") +parent_sep_const <- mmkin( + parent_deg_mods, ds, + error_model = "const", + cluster = cl, quiet = TRUE) +parent_sep_tc <- update(parent_sep_const, error_model = "tc") +``` + +To select the parent model, the corresponding hierarchical fits are performed below. + +```{r parent-mhmkin, dependson = "parent-sep"} +parent_mhmkin <- mhmkin(list(parent_sep_const, parent_sep_tc), cluster = cl) +status(parent_mhmkin) |> kable() +``` + +All fits terminate without errors (status OK). The check for ill-defined +parameters shows that not all random effect parameters can be robustly +quantified. + +```{r dependson = "parent_mhmkin"} +illparms(parent_mhmkin) |> kable() +``` + +Therefore, the fits are updated, excluding random effects that were +ill-defined according to the `illparms` function. The status of the fits +is checked. + +```{r parent-mhmkin-refined} +parent_mhmkin_refined <- update(parent_mhmkin, + no_random_effect = illparms(parent_mhmkin)) +status(parent_mhmkin_refined) |> kable() +``` + +Also, it is checked if the AIC values of the refined fits are actually smaller +than the AIC values of the original fits. + +```{r dependson = "parent-mhmkin-refined"} +(AIC(parent_mhmkin_refined) < AIC(parent_mhmkin)) |> kable() +``` + +From the refined fits, the most suitable model is selected using the AIC. + +```{r parent-best, dependson = "parent-mhmkin"} +aic_parent <- AIC(parent_mhmkin_refined) +min_aic <- which(aic_parent == min(aic_parent), arr.ind = TRUE) +best_degmod_parent <- rownames(aic_parent)[min_aic[1]] +best_errmod_parent <- colnames(aic_parent)[min_aic[2]] +anova(parent_mhmkin_refined) |> kable(digits = 1) +parent_best <- parent_mhmkin_refined[[best_degmod_parent, best_errmod_parent]] +``` + +Based on the AIC, the combination of the `r best_degmod_parent` degradation +model with the error model `r errmods[best_errmod_parent]` is identified to +be most suitable for the degradation of the parent. The check below +confirms that no ill-defined parameters remain for this combined model. + +```{r dependson = "parent-best"} +illparms(parent_best) +``` + +The corresponding fit is plotted below. + +```{r dependson = "parent-best"} +plot(parent_best) +``` +The fitted parameters, together with approximate confidence +intervals are listed below. + +```{r dependson = "parent-best"} +parms(parent_best, ci = TRUE) |> kable(digits = 3) +``` + +To investigate a potential covariate influence on degradation parameters, a +covariate model is added to the hierarchical model for each of the degradation +parameters with well-defined random effects. Also, a version with covariate +models for both of them is fitted. + +```{r parent-best-pH} +parent_best_pH_1 <- update(parent_best, covariates = covariates, + covariate_models = list(log_k_lambda_free ~ pH)) +parent_best_pH_2 <- update(parent_best, covariates = covariates, + covariate_models = list(log_k_lambda_bound_free ~ pH)) +parent_best_pH_3 <- update(parent_best, covariates = covariates, + covariate_models = list(log_k_lambda_free ~ pH, log_k_lambda_bound_free ~ pH)) +``` + +The resulting models are compared. + +```{r dependson = "parent-best-pH"} +anova(parent_best, parent_best_pH_1, parent_best_pH_2, parent_best_pH_3) |> + kable(digits = 1) +``` + +The model fit with the lowest AIC is the one with a pH correlation of the +desorption rate constant `k_lambda_bound_free`. Plot and parameter listing +of this fit are shown below. Also, it is confirmed that no ill-defined +variance parameters are found. + +```{r dependson = "parent-best-pH"} +plot(parent_best_pH_2) +``` + +```{r dependson = "parent-best-pH"} +illparms(parent_best_pH_2) +parms(parent_best_pH_2, ci = TRUE) |> kable(digits = 3) +``` + +\clearpage + +# Pathway fits + +As an example of a pathway fit, a model with SFORB for the parent compound and +parallel formation of two metabolites is set up. + +```{r path-1-degmod} +if (!dir.exists("dlls")) dir.create("dlls") + +m_sforb_sfo2 = mkinmod( + lambda = mkinsub("SFORB", to = c("c_V", "c_XV")), + c_V = mkinsub("SFO"), + c_XV = mkinsub("SFO"), + name = "sforb_sfo2", + dll_dir = "dlls", + overwrite = TRUE, quiet = TRUE +) +``` + +Separate evaluations of all datasets are performed with constant variance +and using two-component error. + +```{r path-1-sep, dependson = c("path-1-degmod", "ds")} +sforb_sep_const <- mmkin(list(sforb_path = m_sforb_sfo2), ds, + cluster = cl, quiet = TRUE) +sforb_sep_tc <- update(sforb_sep_const, error_model = "tc") +``` + +The separate fits with constant variance are plotted. + +```{r dependson = "path-1-sep", fig.height = 9} +plot(mixed(sforb_sep_const)) +``` + +The two corresponding hierarchical fits, with the random effects for the parent +degradation parameters excluded as discussed above, and including the covariate +model that was identified for the parent degradation, are attempted below. + +```{r path-1, dependson = "path-1-sep"} +path_1 <- mhmkin(list(sforb_sep_const, sforb_sep_tc), + no_random_effect = c("lambda_free_0", "log_k_lambda_free_bound"), + covariates = covariates, covariate_models = list(log_k_lambda_bound_free ~ pH), + cluster = cl) +``` + +```{r dependson = "path-1"} +status(path_1) |> kable() +``` + +The status information shows that both fits were successfully completed. + +```{r dependson = "path-1"} +anova(path_1) |> kable(digits = 1) +``` +Model comparison shows that the two-component error model provides a much +better fit. + +```{r dependson = "path-1"} +illparms(path_1[["sforb_path", "tc"]]) +``` + +Two ill-defined variance components are found. Therefore, the fit is +repeated with the corresponding random effects removed. + +```{r path-1-refined, dependson = "path-1"} +path_1_refined <- update(path_1[["sforb_path", "tc"]], + no_random_effect = c("lambda_free_0", "log_k_lambda_free_bound", + "log_k_c_XV", "f_lambda_ilr_2")) +``` + +The empty output of the illparms function indicates that there are no +ill-defined parameters remaining in the refined fit. + +```{r dependson = "path-1-refined"} +illparms(path_1_refined) +``` + +Below, the refined fit is plotted and the fitted parameters are shown together +with their 95% confidence intervals. + +```{r dependson = "path-1-refined", fig.height = 9} +plot(path_1_refined) +``` + +```{r dependson = "path-1-refined", fig.height = 9} +parms(path_1_refined, ci = TRUE) |> kable(digits = 3) +``` + +\clearpage + +# Appendix + +## Listings of initial parent fits + +```{r listings-parent, results = "asis", echo = FALSE, dependson = "parent_mhmkin"} +for (deg_mod in parent_deg_mods) { + for (err_mod in c("const", "tc")) { + caption <- paste("Hierarchical", deg_mod, "fit with", errmods[err_mod]) + tex_listing(parent_mhmkin[[deg_mod, err_mod]], caption) + } +} +``` + +## Listings of refined parent fits + +```{r listings-parent-refined, results = "asis", echo = FALSE, dependson = "parent_mhmkin_refined"} +for (deg_mod in parent_deg_mods) { + for (err_mod in c("const", "tc")) { + caption <- paste("Refined hierarchical", deg_mod, "fit with", errmods[err_mod]) + tex_listing(parent_mhmkin_refined[[deg_mod, err_mod]], caption) + } +} +``` + +## Listings of pathway fits + +```{r listings-path-1, results = "asis", echo = FALSE, dependson = "path-1-refined"} +tex_listing(path_1[["sforb_path", "const"]], + caption = "Hierarchical fit of SFORB-SFO2 with constant variance") +tex_listing(path_1[["sforb_path", "tc"]], + caption = "Hierarchical fit of SFORB-SFO2 with two-component error") +tex_listing(path_1_refined, + caption = "Refined hierarchical fit of SFORB-SFO2 with two-component error") +``` + +## Session info + +```{r echo = FALSE, cache = FALSE} +parallel::stopCluster(cl) +sessionInfo() +``` + diff --git a/inst/rmarkdown/templates/hierarchical_kinetics/template.yaml b/inst/rmarkdown/templates/hierarchical_kinetics/template.yaml new file mode 100644 index 00000000..d8ab6a4d --- /dev/null +++ b/inst/rmarkdown/templates/hierarchical_kinetics/template.yaml @@ -0,0 +1,3 @@ +name: Hierarchical kinetics +description: Hierarchical kinetic modelling of degradation data +create_dir: true diff --git a/inst/testdata/cyantraniliprole_soil_efsa_2014.xlsx b/inst/testdata/cyantraniliprole_soil_efsa_2014.xlsx Binary files differnew file mode 100644 index 00000000..3252fdf1 --- /dev/null +++ b/inst/testdata/cyantraniliprole_soil_efsa_2014.xlsx diff --git a/inst/testdata/lambda-cyhalothrin_soil_efsa_2014.xlsx b/inst/testdata/lambda-cyhalothrin_soil_efsa_2014.xlsx Binary files differnew file mode 100644 index 00000000..32fc049f --- /dev/null +++ b/inst/testdata/lambda-cyhalothrin_soil_efsa_2014.xlsx |