From 08e600c0eea6153b433659c08ea49aead5ffd932 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Wed, 26 Oct 2022 10:36:19 +0200 Subject: Update multistart example code and vignette --- vignettes/web_only/multistart.R | 36 +++++++++++++++++++++++++++++ vignettes/web_only/multistart.html | 36 +++++++++++++++++++++-------- vignettes/web_only/multistart.rmd | 47 ++++++++++++++++++++++++++++---------- 3 files changed, 97 insertions(+), 22 deletions(-) create mode 100644 vignettes/web_only/multistart.R (limited to 'vignettes') diff --git a/vignettes/web_only/multistart.R b/vignettes/web_only/multistart.R new file mode 100644 index 00000000..36612443 --- /dev/null +++ b/vignettes/web_only/multistart.R @@ -0,0 +1,36 @@ +## ----------------------------------------------------------------------------- +library(mkin) +dmta_ds <- lapply(1:7, function(i) { + ds_i <- dimethenamid_2018$ds[[i]]$data + ds_i[ds_i$name == "DMTAP", "name"] <- "DMTA" + ds_i$time <- ds_i$time * dimethenamid_2018$f_time_norm[i] + ds_i +}) +names(dmta_ds) <- sapply(dimethenamid_2018$ds, function(ds) ds$title) +dmta_ds[["Elliot"]] <- rbind(dmta_ds[["Elliot 1"]], dmta_ds[["Elliot 2"]]) +dmta_ds[["Elliot 1"]] <- dmta_ds[["Elliot 2"]] <- NULL + +## ----------------------------------------------------------------------------- +f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE) +f_saem_full <- saem(f_mmkin) +illparms(f_saem_full) + +## ----------------------------------------------------------------------------- +f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16) +parhist(f_saem_full_multi) + +## ----------------------------------------------------------------------------- +f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2") +illparms(f_saem_reduced) +f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16) +parhist(f_saem_reduced_multi, lpos = "topright") + +## ----------------------------------------------------------------------------- +llhist(f_saem_reduced_multi) + +## ----------------------------------------------------------------------------- +parhist(f_saem_reduced_multi, lpos = "topright", llmin = -326, ylim = c(0.5, 2)) + +## ----------------------------------------------------------------------------- +anova(f_saem_full, best(f_saem_reduced_multi), test = TRUE) + diff --git a/vignettes/web_only/multistart.html b/vignettes/web_only/multistart.html index 00631aec..5568ad2c 100644 --- a/vignettes/web_only/multistart.html +++ b/vignettes/web_only/multistart.html @@ -364,12 +364,12 @@ pre code {

Short demo of the multistart method

Johannes Ranke

-

Last change 19 September 2022 (rebuilt 2022-09-28)

+

Last change 26 September 2022 (rebuilt 2022-10-26)

-

This is a vignette, because the multistart method does not seem to work in pkgdown example code and I wanted to show the plots in the online docs. The dimethenamid data from 2018 from seven soils is used as example data.

+

The dimethenamid data from 2018 from seven soils is used as example data in this vignette.

library(mkin)
 dmta_ds <- lapply(1:7, function(i) {
   ds_i <- dimethenamid_2018$ds[[i]]$data
@@ -383,18 +383,34 @@ dmta_ds[["Elliot 1"]] <- dmta_ds[["Elliot 2"]] <- NULL
 

First, we check the DFOP model with the two-component error model and random effects for all degradation parameters.

f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE)
 f_saem_full <- saem(f_mmkin)
-f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16)
+illparms(f_saem_full)
+
## [1] "sd(log_k2)"
+

We see that not all variability parameters are identifiable. The illparms function tells us that the confidence interval for the standard deviation of ‘log_k2’ includes zero. We check this assessment using multiple runs with different starting values.

+
f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16)
 parhist(f_saem_full_multi)
-

-

We see that not all variability parameters are identifiable, most problematic is the variance of k2. So we reduce the parameter distribution model by removing the intersoil variability for this parameter.

-
f_saem_reduced <- update(f_saem_full, covariance.model = diag(c(1, 1, 0, 1)))
-f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16)
+

+

This confirms that the variance of k2 is the most problematic parameter, so we reduce the parameter distribution model by removing the intersoil variability for k2.

+
f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2")
+illparms(f_saem_reduced)
+
## character(0)
+
f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16)
 parhist(f_saem_reduced_multi, lpos = "topright")
-

+

+

The results confirm that all remaining parameters can be determined with sufficient certainty.

We can also analyse the log-likelihoods obtained in the multiple runs:

llhist(f_saem_reduced_multi)
-

-

The one run with the lower likelihood is probably responsible for the outliers in the boxplots above, and caused by some weird starting value combination. In any case, the converged values from the original fit (red circles) appear perfectly acceptable, supporting the choice of starting values made by mkin.

+

+

The parameter histograms can be further improved by excluding the result with the low likelihood.

+
parhist(f_saem_reduced_multi, lpos = "topright", llmin = -326, ylim = c(0.5, 2))
+

+

We can use the anova method to compare the models, including a likelihood ratio test if the models are nested.

+
anova(f_saem_full, best(f_saem_reduced_multi), test = TRUE)
+
## Data: 155 observations of 1 variable(s) grouped in 6 datasets
+## 
+##                            npar    AIC    BIC     Lik Chisq Df Pr(>Chisq)
+## best(f_saem_reduced_multi)    9 663.81 661.93 -322.90                    
+## f_saem_full                  10 668.27 666.19 -324.13     0  1          1
+

While AIC and BIC are lower for the reduced model, the likelihood ratio test does not indicate a significant difference between the fits.

diff --git a/vignettes/web_only/multistart.rmd b/vignettes/web_only/multistart.rmd index 8f349973..94f4ef98 100644 --- a/vignettes/web_only/multistart.rmd +++ b/vignettes/web_only/multistart.rmd @@ -1,7 +1,7 @@ --- title: Short demo of the multistart method author: Johannes Ranke -date: Last change 19 September 2022 (rebuilt `r Sys.Date()`) +date: Last change 26 September 2022 (rebuilt `r Sys.Date()`) output: html_document vignette: > @@ -10,9 +10,7 @@ vignette: > %\VignetteEncoding{UTF-8} --- -This is a vignette, because the multistart method does not seem to work in -pkgdown example code and I wanted to show the plots in the online docs. -The dimethenamid data from 2018 from seven soils is used as example data. +The dimethenamid data from 2018 from seven soils is used as example data in this vignette. ```{r} library(mkin) @@ -33,27 +31,52 @@ random effects for all degradation parameters. ```{r} f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE) f_saem_full <- saem(f_mmkin) +illparms(f_saem_full) +``` +We see that not all variability parameters are identifiable. The `illparms` +function tells us that the confidence interval for the standard deviation +of 'log_k2' includes zero. We check this assessment using multiple runs +with different starting values. + +```{r} f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16) parhist(f_saem_full_multi) ``` -We see that not all variability parameters are identifiable, most problematic -is the variance of k2. So we reduce the parameter distribution model by -removing the intersoil variability for this parameter. +This confirms that the variance of k2 is the most problematic parameter, so we +reduce the parameter distribution model by removing the intersoil variability +for k2. ```{r} -f_saem_reduced <- update(f_saem_full, covariance.model = diag(c(1, 1, 0, 1))) +f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2") +illparms(f_saem_reduced) f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16) parhist(f_saem_reduced_multi, lpos = "topright") ``` +The results confirm that all remaining parameters can be determined with sufficient +certainty. + We can also analyse the log-likelihoods obtained in the multiple runs: ```{r} llhist(f_saem_reduced_multi) ``` -The one run with the lower likelihood is probably responsible for the outliers -in the boxplots above, and caused by some weird starting value combination. In -any case, the converged values from the original fit (red circles) appear -perfectly acceptable, supporting the choice of starting values made by mkin. +The parameter histograms can be further improved by excluding the result with +the low likelihood. + +```{r} +parhist(f_saem_reduced_multi, lpos = "topright", llmin = -326, ylim = c(0.5, 2)) +``` + +We can use the `anova` method to compare the models, including a likelihood ratio +test if the models are nested. + +```{r} +anova(f_saem_full, best(f_saem_reduced_multi), test = TRUE) +``` + +While AIC and BIC are lower for the reduced model, the likelihood ratio test +does not indicate a significant difference between the fits. + -- cgit v1.2.1