From 08e600c0eea6153b433659c08ea49aead5ffd932 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Wed, 26 Oct 2022 10:36:19 +0200 Subject: Update multistart example code and vignette --- docs/dev/articles/web_only/multistart.html | 44 ++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 12 deletions(-) (limited to 'docs/dev/articles/web_only/multistart.html') diff --git a/docs/dev/articles/web_only/multistart.html b/docs/dev/articles/web_only/multistart.html index 4af5def0..275fc520 100644 --- a/docs/dev/articles/web_only/multistart.html +++ b/docs/dev/articles/web_only/multistart.html @@ -109,7 +109,7 @@

Short demo of the multistart method

Johannes Ranke

-

Last change 19 September 2022 (rebuilt 2022-10-26)

+

Last change 26 September 2022 (rebuilt 2022-10-26)

Source: vignettes/web_only/multistart.rmd @@ -118,7 +118,7 @@ -

This is a vignette, because the multistart method does not seem to work in pkgdown example code and I wanted to show the plots in the online docs. The dimethenamid data from 2018 from seven soils is used as example data.

+

The dimethenamid data from 2018 from seven soils is used as example data in this vignette.

 library(mkin)
 dmta_ds <- lapply(1:7, function(i) {
@@ -134,20 +134,40 @@
 
 f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE)
 f_saem_full <- saem(f_mmkin)
-f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16)
+illparms(f_saem_full)
+
## [1] "sd(log_k2)"
+

We see that not all variability parameters are identifiable. The illparms function tells us that the confidence interval for the standard deviation of ‘log_k2’ includes zero. We check this assessment using multiple runs with different starting values.

+
+f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16)
 parhist(f_saem_full_multi)
-

-

We see that not all variability parameters are identifiable, most problematic is the variance of k2. So we reduce the parameter distribution model by removing the intersoil variability for this parameter.

-
-f_saem_reduced <- update(f_saem_full, covariance.model = diag(c(1, 1, 0, 1)))
-f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16)
-parhist(f_saem_reduced_multi, lpos = "topright")

+

This confirms that the variance of k2 is the most problematic parameter, so we reduce the parameter distribution model by removing the intersoil variability for k2.

+
+f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2")
+illparms(f_saem_reduced)
+
## character(0)
+
+f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16)
+parhist(f_saem_reduced_multi, lpos = "topright")
+

+

The results confirm that all remaining parameters can be determined with sufficient certainty.

We can also analyse the log-likelihoods obtained in the multiple runs:

-
+
 llhist(f_saem_reduced_multi)
-

-

The one run with the lower likelihood is probably responsible for the outliers in the boxplots above, and caused by some weird starting value combination. In any case, the converged values from the original fit (red circles) appear perfectly acceptable, supporting the choice of starting values made by mkin.

+

+

The parameter histograms can be further improved by excluding the result with the low likelihood.

+
+parhist(f_saem_reduced_multi, lpos = "topright", llmin = -326, ylim = c(0.5, 2))
+

+

We can use the anova method to compare the models, including a likelihood ratio test if the models are nested.

+
+anova(f_saem_full, best(f_saem_reduced_multi), test = TRUE)
+
## Data: 155 observations of 1 variable(s) grouped in 6 datasets
+## 
+##                            npar    AIC    BIC     Lik Chisq Df Pr(>Chisq)
+## best(f_saem_reduced_multi)    9 663.81 661.93 -322.90                    
+## f_saem_full                  10 668.27 666.19 -324.13     0  1          1
+

While AIC and BIC are lower for the reduced model, the likelihood ratio test does not indicate a significant difference between the fits.