From 08e600c0eea6153b433659c08ea49aead5ffd932 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Wed, 26 Oct 2022 10:36:19 +0200 Subject: Update multistart example code and vignette --- R/multistart.R | 12 ++---- docs/dev/articles/web_only/multistart.html | 44 +++++++++++++------ .../figure-html/unnamed-chunk-3-1.png | Bin 55167 -> 59606 bytes .../figure-html/unnamed-chunk-4-1.png | Bin 21474 -> 55167 bytes .../figure-html/unnamed-chunk-5-1.png | Bin 0 -> 21474 bytes .../figure-html/unnamed-chunk-6-1.png | Bin 0 -> 54481 bytes docs/dev/pkgdown.yml | 2 +- docs/dev/reference/Rplot001.png | Bin 1011 -> 19755 bytes docs/dev/reference/Rplot002.png | Bin 17010 -> 17365 bytes docs/dev/reference/multistart-1.png | Bin 59254 -> 59284 bytes docs/dev/reference/multistart-2.png | Bin 54149 -> 54871 bytes docs/dev/reference/multistart.html | 12 +----- man/multistart.Rd | 11 +---- vignettes/web_only/multistart.R | 36 ++++++++++++++++ vignettes/web_only/multistart.html | 36 +++++++++++----- vignettes/web_only/multistart.rmd | 47 +++++++++++++++------ 16 files changed, 137 insertions(+), 63 deletions(-) create mode 100644 docs/dev/articles/web_only/multistart_files/figure-html/unnamed-chunk-5-1.png create mode 100644 docs/dev/articles/web_only/multistart_files/figure-html/unnamed-chunk-6-1.png create mode 100644 vignettes/web_only/multistart.R diff --git a/R/multistart.R b/R/multistart.R index 4f5ecde3..fc2dcd7d 100644 --- a/R/multistart.R +++ b/R/multistart.R @@ -7,18 +7,12 @@ #' inspired by the article on practical identifiabiliy in the frame of nonlinear #' mixed-effects models by Duchesne et al (2021). #' -#' In case the online version of this help page contains error messages -#' in the example code and no plots, this is due to the multistart method -#' not working when called by pkgdown. Please refer to the -#' [online vignette](https://pkgdown.jrwb.de/mkin/dev/articles/web_only/multistart.html) -#' in this case. -#' #' @param object The fit object to work with #' @param n How many different combinations of starting parameters should be #' used? #' @param cores How many fits should be run in parallel (only on posix platforms)? #' @param cluster A cluster as returned by [parallel::makeCluster] to be used -#' for parallel execution. +#' for parallel execution. #' @param \dots Passed to the update function. #' @param x The multistart object to print #' @return A list of [saem.mmkin] objects, with class attributes @@ -46,7 +40,7 @@ #' f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE) #' f_saem_full <- saem(f_mmkin) #' f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16) -#' parhist(f_saem_full_multi, lpos = "bottomright") +#' parhist(f_saem_full_multi, lpos = "bottomleft") #' illparms(f_saem_full) #' #' f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2") @@ -58,7 +52,7 @@ #' cl <- makePSOCKcluster(12) #' clusterExport(cl, "f_mmkin") #' f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cluster = cl) -#' parhist(f_saem_reduced_multi, lpos = "bottomright") +#' parhist(f_saem_reduced_multi, lpos = "topright") #' } multistart <- function(object, n = 50, cores = if (Sys.info()["sysname"] == "Windows") 1 else parallel::detectCores(), diff --git a/docs/dev/articles/web_only/multistart.html b/docs/dev/articles/web_only/multistart.html index 4af5def0..275fc520 100644 --- a/docs/dev/articles/web_only/multistart.html +++ b/docs/dev/articles/web_only/multistart.html @@ -109,7 +109,7 @@

Short demo of the multistart method

Johannes Ranke

-

Last change 19 September 2022 (rebuilt 2022-10-26)

+

Last change 26 September 2022 (rebuilt 2022-10-26)

Source: vignettes/web_only/multistart.rmd @@ -118,7 +118,7 @@ -

This is a vignette, because the multistart method does not seem to work in pkgdown example code and I wanted to show the plots in the online docs. The dimethenamid data from 2018 from seven soils is used as example data.

+

The dimethenamid data from 2018 from seven soils is used as example data in this vignette.

 library(mkin)
 dmta_ds <- lapply(1:7, function(i) {
@@ -134,20 +134,40 @@
 
 f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE)
 f_saem_full <- saem(f_mmkin)
-f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16)
+illparms(f_saem_full)
+
## [1] "sd(log_k2)"
+

We see that not all variability parameters are identifiable. The illparms function tells us that the confidence interval for the standard deviation of ‘log_k2’ includes zero. We check this assessment using multiple runs with different starting values.

+
+f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16)
 parhist(f_saem_full_multi)
-

-

We see that not all variability parameters are identifiable, most problematic is the variance of k2. So we reduce the parameter distribution model by removing the intersoil variability for this parameter.

-
-f_saem_reduced <- update(f_saem_full, covariance.model = diag(c(1, 1, 0, 1)))
-f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16)
-parhist(f_saem_reduced_multi, lpos = "topright")

+

This confirms that the variance of k2 is the most problematic parameter, so we reduce the parameter distribution model by removing the intersoil variability for k2.

+
+f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2")
+illparms(f_saem_reduced)
+
## character(0)
+
+f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16)
+parhist(f_saem_reduced_multi, lpos = "topright")
+

+

The results confirm that all remaining parameters can be determined with sufficient certainty.

We can also analyse the log-likelihoods obtained in the multiple runs:

-
+
 llhist(f_saem_reduced_multi)
-

-

The one run with the lower likelihood is probably responsible for the outliers in the boxplots above, and caused by some weird starting value combination. In any case, the converged values from the original fit (red circles) appear perfectly acceptable, supporting the choice of starting values made by mkin.

+

+

The parameter histograms can be further improved by excluding the result with the low likelihood.

+
+parhist(f_saem_reduced_multi, lpos = "topright", llmin = -326, ylim = c(0.5, 2))
+

+

We can use the anova method to compare the models, including a likelihood ratio test if the models are nested.

+
+anova(f_saem_full, best(f_saem_reduced_multi), test = TRUE)
+
## Data: 155 observations of 1 variable(s) grouped in 6 datasets
+## 
+##                            npar    AIC    BIC     Lik Chisq Df Pr(>Chisq)
+## best(f_saem_reduced_multi)    9 663.81 661.93 -322.90                    
+## f_saem_full                  10 668.27 666.19 -324.13     0  1          1
+

While AIC and BIC are lower for the reduced model, the likelihood ratio test does not indicate a significant difference between the fits.

-
-

Details

-

In case the online version of this help page contains error messages -in the example code and no plots, this is due to the multistart method -not working when called by pkgdown. Please refer to the -online vignette -in this case.

References

@@ -203,7 +195,7 @@ doi: 10.1186/s12859-021-04373-4.

f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE) f_saem_full <- saem(f_mmkin) f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16) -parhist(f_saem_full_multi, lpos = "bottomright") +parhist(f_saem_full_multi, lpos = "bottomleft") illparms(f_saem_full) #> [1] "sd(log_k2)" @@ -219,7 +211,7 @@ doi: 10.1186/s12859-021-04373-4.

clusterExport(cl, "f_mmkin") #> Error in get(name, envir = envir): object 'f_mmkin' not found f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cluster = cl) -parhist(f_saem_reduced_multi, lpos = "bottomright") +parhist(f_saem_reduced_multi, lpos = "topright") # }
diff --git a/man/multistart.Rd b/man/multistart.Rd index d20c0465..ebcc7d80 100644 --- a/man/multistart.Rd +++ b/man/multistart.Rd @@ -61,13 +61,6 @@ started with a certain range of reasonable starting parameters. It is inspired by the article on practical identifiabiliy in the frame of nonlinear mixed-effects models by Duchesne et al (2021). } -\details{ -In case the online version of this help page contains error messages -in the example code and no plots, this is due to the multistart method -not working when called by pkgdown. Please refer to the -\href{https://pkgdown.jrwb.de/mkin/dev/articles/web_only/multistart.html}{online vignette} -in this case. -} \examples{ \dontrun{ library(mkin) @@ -84,7 +77,7 @@ dmta_ds[["Elliot 1"]] <- dmta_ds[["Elliot 2"]] <- NULL f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE) f_saem_full <- saem(f_mmkin) f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16) -parhist(f_saem_full_multi, lpos = "bottomright") +parhist(f_saem_full_multi, lpos = "bottomleft") illparms(f_saem_full) f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2") @@ -96,7 +89,7 @@ library(parallel) cl <- makePSOCKcluster(12) clusterExport(cl, "f_mmkin") f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cluster = cl) -parhist(f_saem_reduced_multi, lpos = "bottomright") +parhist(f_saem_reduced_multi, lpos = "topright") } } \references{ diff --git a/vignettes/web_only/multistart.R b/vignettes/web_only/multistart.R new file mode 100644 index 00000000..36612443 --- /dev/null +++ b/vignettes/web_only/multistart.R @@ -0,0 +1,36 @@ +## ----------------------------------------------------------------------------- +library(mkin) +dmta_ds <- lapply(1:7, function(i) { + ds_i <- dimethenamid_2018$ds[[i]]$data + ds_i[ds_i$name == "DMTAP", "name"] <- "DMTA" + ds_i$time <- ds_i$time * dimethenamid_2018$f_time_norm[i] + ds_i +}) +names(dmta_ds) <- sapply(dimethenamid_2018$ds, function(ds) ds$title) +dmta_ds[["Elliot"]] <- rbind(dmta_ds[["Elliot 1"]], dmta_ds[["Elliot 2"]]) +dmta_ds[["Elliot 1"]] <- dmta_ds[["Elliot 2"]] <- NULL + +## ----------------------------------------------------------------------------- +f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE) +f_saem_full <- saem(f_mmkin) +illparms(f_saem_full) + +## ----------------------------------------------------------------------------- +f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16) +parhist(f_saem_full_multi) + +## ----------------------------------------------------------------------------- +f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2") +illparms(f_saem_reduced) +f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16) +parhist(f_saem_reduced_multi, lpos = "topright") + +## ----------------------------------------------------------------------------- +llhist(f_saem_reduced_multi) + +## ----------------------------------------------------------------------------- +parhist(f_saem_reduced_multi, lpos = "topright", llmin = -326, ylim = c(0.5, 2)) + +## ----------------------------------------------------------------------------- +anova(f_saem_full, best(f_saem_reduced_multi), test = TRUE) + diff --git a/vignettes/web_only/multistart.html b/vignettes/web_only/multistart.html index 00631aec..5568ad2c 100644 --- a/vignettes/web_only/multistart.html +++ b/vignettes/web_only/multistart.html @@ -364,12 +364,12 @@ pre code {

Short demo of the multistart method

Johannes Ranke

-

Last change 19 September 2022 (rebuilt 2022-09-28)

+

Last change 26 September 2022 (rebuilt 2022-10-26)

-

This is a vignette, because the multistart method does not seem to work in pkgdown example code and I wanted to show the plots in the online docs. The dimethenamid data from 2018 from seven soils is used as example data.

+

The dimethenamid data from 2018 from seven soils is used as example data in this vignette.

library(mkin)
 dmta_ds <- lapply(1:7, function(i) {
   ds_i <- dimethenamid_2018$ds[[i]]$data
@@ -383,18 +383,34 @@ dmta_ds[["Elliot 1"]] <- dmta_ds[["Elliot 2"]] <- NULL
 

First, we check the DFOP model with the two-component error model and random effects for all degradation parameters.

f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE)
 f_saem_full <- saem(f_mmkin)
-f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16)
+illparms(f_saem_full)
+
## [1] "sd(log_k2)"
+

We see that not all variability parameters are identifiable. The illparms function tells us that the confidence interval for the standard deviation of ‘log_k2’ includes zero. We check this assessment using multiple runs with different starting values.

+
f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16)
 parhist(f_saem_full_multi)
-

-

We see that not all variability parameters are identifiable, most problematic is the variance of k2. So we reduce the parameter distribution model by removing the intersoil variability for this parameter.

-
f_saem_reduced <- update(f_saem_full, covariance.model = diag(c(1, 1, 0, 1)))
-f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16)
+

+

This confirms that the variance of k2 is the most problematic parameter, so we reduce the parameter distribution model by removing the intersoil variability for k2.

+
f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2")
+illparms(f_saem_reduced)
+
## character(0)
+
f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16)
 parhist(f_saem_reduced_multi, lpos = "topright")
-

+

+

The results confirm that all remaining parameters can be determined with sufficient certainty.

We can also analyse the log-likelihoods obtained in the multiple runs:

llhist(f_saem_reduced_multi)
-

-

The one run with the lower likelihood is probably responsible for the outliers in the boxplots above, and caused by some weird starting value combination. In any case, the converged values from the original fit (red circles) appear perfectly acceptable, supporting the choice of starting values made by mkin.

+

+

The parameter histograms can be further improved by excluding the result with the low likelihood.

+
parhist(f_saem_reduced_multi, lpos = "topright", llmin = -326, ylim = c(0.5, 2))
+

+

We can use the anova method to compare the models, including a likelihood ratio test if the models are nested.

+
anova(f_saem_full, best(f_saem_reduced_multi), test = TRUE)
+
## Data: 155 observations of 1 variable(s) grouped in 6 datasets
+## 
+##                            npar    AIC    BIC     Lik Chisq Df Pr(>Chisq)
+## best(f_saem_reduced_multi)    9 663.81 661.93 -322.90                    
+## f_saem_full                  10 668.27 666.19 -324.13     0  1          1
+

While AIC and BIC are lower for the reduced model, the likelihood ratio test does not indicate a significant difference between the fits.

diff --git a/vignettes/web_only/multistart.rmd b/vignettes/web_only/multistart.rmd index 8f349973..94f4ef98 100644 --- a/vignettes/web_only/multistart.rmd +++ b/vignettes/web_only/multistart.rmd @@ -1,7 +1,7 @@ --- title: Short demo of the multistart method author: Johannes Ranke -date: Last change 19 September 2022 (rebuilt `r Sys.Date()`) +date: Last change 26 September 2022 (rebuilt `r Sys.Date()`) output: html_document vignette: > @@ -10,9 +10,7 @@ vignette: > %\VignetteEncoding{UTF-8} --- -This is a vignette, because the multistart method does not seem to work in -pkgdown example code and I wanted to show the plots in the online docs. -The dimethenamid data from 2018 from seven soils is used as example data. +The dimethenamid data from 2018 from seven soils is used as example data in this vignette. ```{r} library(mkin) @@ -33,27 +31,52 @@ random effects for all degradation parameters. ```{r} f_mmkin <- mmkin("DFOP", dmta_ds, error_model = "tc", cores = 7, quiet = TRUE) f_saem_full <- saem(f_mmkin) +illparms(f_saem_full) +``` +We see that not all variability parameters are identifiable. The `illparms` +function tells us that the confidence interval for the standard deviation +of 'log_k2' includes zero. We check this assessment using multiple runs +with different starting values. + +```{r} f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16) parhist(f_saem_full_multi) ``` -We see that not all variability parameters are identifiable, most problematic -is the variance of k2. So we reduce the parameter distribution model by -removing the intersoil variability for this parameter. +This confirms that the variance of k2 is the most problematic parameter, so we +reduce the parameter distribution model by removing the intersoil variability +for k2. ```{r} -f_saem_reduced <- update(f_saem_full, covariance.model = diag(c(1, 1, 0, 1))) +f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2") +illparms(f_saem_reduced) f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16) parhist(f_saem_reduced_multi, lpos = "topright") ``` +The results confirm that all remaining parameters can be determined with sufficient +certainty. + We can also analyse the log-likelihoods obtained in the multiple runs: ```{r} llhist(f_saem_reduced_multi) ``` -The one run with the lower likelihood is probably responsible for the outliers -in the boxplots above, and caused by some weird starting value combination. In -any case, the converged values from the original fit (red circles) appear -perfectly acceptable, supporting the choice of starting values made by mkin. +The parameter histograms can be further improved by excluding the result with +the low likelihood. + +```{r} +parhist(f_saem_reduced_multi, lpos = "topright", llmin = -326, ylim = c(0.5, 2)) +``` + +We can use the `anova` method to compare the models, including a likelihood ratio +test if the models are nested. + +```{r} +anova(f_saem_full, best(f_saem_reduced_multi), test = TRUE) +``` + +While AIC and BIC are lower for the reduced model, the likelihood ratio test +does not indicate a significant difference between the fits. + -- cgit v1.2.1