From 004fce2520d6889d82226e21bc443426e81d93f2 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Mon, 17 Apr 2023 19:39:09 +0200 Subject: Improve docs of multistart method --- R/multistart.R | 11 ++-- docs/dev/articles/index.html | 5 -- docs/dev/articles/web_only/multistart.html | 60 ++++++++------------- .../figure-html/unnamed-chunk-3-1.png | Bin 65871 -> 64221 bytes .../figure-html/unnamed-chunk-4-1.png | Bin 59520 -> 52914 bytes .../figure-html/unnamed-chunk-5-1.png | Bin 22355 -> 22142 bytes docs/dev/authors.html | 5 -- docs/dev/index.html | 27 +++------- docs/dev/news/index.html | 56 ++++++------------- docs/dev/pkgdown.yml | 4 +- docs/dev/reference/Rplot001.png | Bin 13993 -> 20512 bytes docs/dev/reference/Rplot002.png | Bin 13470 -> 16766 bytes docs/dev/reference/index.html | 5 -- docs/dev/reference/mkinpredict.html | 9 ++-- docs/dev/reference/multistart-1.png | Bin 66388 -> 63843 bytes docs/dev/reference/multistart-2.png | Bin 56780 -> 52220 bytes docs/dev/reference/multistart.html | 10 ++-- docs/dev/sitemap.xml | 3 -- .../skeleton/.skeleton.Rmd.swp | Bin 0 -> 20480 bytes man/multistart.Rd | 7 ++- vignettes/web_only/multistart.html | 52 ++++++++---------- vignettes/web_only/multistart.rmd | 31 +++++------ 22 files changed, 99 insertions(+), 186 deletions(-) create mode 100644 inst/rmarkdown/templates/hierarchical_kinetics/skeleton/.skeleton.Rmd.swp diff --git a/R/multistart.R b/R/multistart.R index bdfbfe63..aeea2d81 100644 --- a/R/multistart.R +++ b/R/multistart.R @@ -45,13 +45,12 @@ #' #' f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2") #' illparms(f_saem_reduced) -#' # On Windows, we need to create a cluster first. When working with -#' # such a cluster, we need to export the mmkin object to the cluster -#' # nodes, as it is referred to when updating the saem object on the nodes. +#' # On Windows, we need to create a PSOCK cluster first and refer to it +#' # in the call to multistart() #' library(parallel) #' cl <- makePSOCKcluster(12) #' f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cluster = cl) -#' parplot(f_saem_reduced_multi, lpos = "topright") +#' parplot(f_saem_reduced_multi, lpos = "topright", ylim = c(0.5, 2)) #' stopCluster(cl) #' } multistart <- function(object, n = 50, @@ -103,9 +102,7 @@ multistart.saem.mmkin <- function(object, n = 50, cores = 1, res <- parallel::mclapply(1:n, fit_function, mc.cores = cores, mc.preschedule = FALSE) } else { - res <- parallel::parLapplyLB(cluster, 1:n, fit_function, - mc.preschedule = FALSE - ) + res <- parallel::parLapplyLB(cluster, 1:n, fit_function) } attr(res, "orig") <- object attr(res, "start_parms") <- start_parms diff --git a/docs/dev/articles/index.html b/docs/dev/articles/index.html index a82eb999..2aacc53a 100644 --- a/docs/dev/articles/index.html +++ b/docs/dev/articles/index.html @@ -81,11 +81,6 @@
  • Example evaluation of NAFTA SOP Attachment examples
  • -
  • -
  • -
  • - -
  • News diff --git a/docs/dev/articles/web_only/multistart.html b/docs/dev/articles/web_only/multistart.html index d3d9d76d..b5635df2 100644 --- a/docs/dev/articles/web_only/multistart.html +++ b/docs/dev/articles/web_only/multistart.html @@ -127,15 +127,13 @@ -
    +

    -

    This confirms that the variance of k2 is the most problematic -parameter, so we reduce the parameter distribution model by removing the -intersoil variability for k2.

    +

    This confirms that the variance of k2 is the most problematic parameter, so we reduce the parameter distribution model by removing the intersoil variability for k2.

     f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2")
     illparms(f_saem_reduced)
    -f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16)
    -parplot(f_saem_reduced_multi, lpos = "topright")
    +f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 8) +parplot(f_saem_reduced_multi, lpos = "topright", ylim = c(0.5, 2))

    -

    The results confirm that all remaining parameters can be determined -with sufficient certainty.

    -

    We can also analyse the log-likelihoods obtained in the multiple -runs:

    +

    The results confirm that all remaining parameters can be determined with sufficient certainty.

    +

    We can also analyse the log-likelihoods obtained in the multiple runs:

     llhist(f_saem_reduced_multi)

    -

    The parameter histograms can be further improved by excluding the -result with the low likelihood.

    +

    We can use the anova method to compare the models.

    -parplot(f_saem_reduced_multi, lpos = "topright", llmin = -326, ylim = c(0.5, 2))
    -

    -

    We can use the anova method to compare the models, -including a likelihood ratio test if the models are nested.

    -
    -anova(f_saem_full, best(f_saem_reduced_multi), test = TRUE)
    +anova(f_saem_full, best(f_saem_full_multi), + f_saem_reduced, best(f_saem_reduced_multi), test = TRUE)
    ## Data: 155 observations of 1 variable(s) grouped in 6 datasets
     ## 
    -##                            npar    AIC    BIC     Lik Chisq Df Pr(>Chisq)
    -## best(f_saem_reduced_multi)    9 663.69 661.82 -322.85                    
    -## f_saem_full                  10 669.77 667.69 -324.89     0  1          1
    -

    While AIC and BIC are lower for the reduced model, the likelihood -ratio test does not indicate a significant difference between the -fits.

    +## npar AIC BIC Lik Chisq Df Pr(>Chisq) +## f_saem_reduced 9 663.74 661.87 -322.87 +## best(f_saem_reduced_multi) 9 663.60 661.72 -322.80 0.1476 0 +## f_saem_full 10 670.35 668.26 -325.17 0.0000 1 1 +## best(f_saem_full_multi) 10 665.61 663.53 -322.80 4.7372 0 +

    The reduced model results in lower AIC and BIC values, so it is clearly preferable. Using multiple starting values gives a large improvement in case of the full model, because it is less well-defined, which impedes convergence. For the reduced model, using multiple starting values only results in a small improvement of the model fit.

  • Example evaluation of NAFTA SOP Attachment examples
  • -
  • -
  • -
  • - -
  • News diff --git a/docs/dev/index.html b/docs/dev/index.html index 317e3380..f892841f 100644 --- a/docs/dev/index.html +++ b/docs/dev/index.html @@ -115,12 +115,6 @@
  • Example evaluation of NAFTA SOP Attachment examples
  • -
  • -
  • - -
  • - -
  • @@ -251,21 +245,12 @@

    References

    - - - - - - - - - + + +
    -Ranke J, Wöltjen J, Schmidt J, and Comets E (2021) Taking kinetic evaluations of degradation data to the next level with nonlinear mixed-effects models. Environments 8 (8) 71 doi:10.3390/environments8080071 -
    -Ranke J, Meinecke S (2019) Error Models for the Kinetic Evaluation of Chemical Degradation Data Environments 6 (12) 124 doi:10.3390/environments6120124 -
    -Ranke J, Wöltjen J, Meinecke S (2018) Comparison of software tools for kinetic evaluation of chemical degradation data Environmental Sciences Europe 30 17 doi:10.1186/s12302-018-0145-1 -
    Ranke J, Wöltjen J, Schmidt J, and Comets E (2021) Taking kinetic evaluations of degradation data to the next level with nonlinear mixed-effects models. Environments 8 (8) 71 doi:10.3390/environments8080071 +
    Ranke J, Meinecke S (2019) Error Models for the Kinetic Evaluation of Chemical Degradation Data Environments 6 (12) 124 doi:10.3390/environments6120124 +
    Ranke J, Wöltjen J, Meinecke S (2018) Comparison of software tools for kinetic evaluation of chemical degradation data Environmental Sciences Europe 30 17 doi:10.1186/s12302-018-0145-1 +
    diff --git a/docs/dev/news/index.html b/docs/dev/news/index.html index 2c169609..70eeeed6 100644 --- a/docs/dev/news/index.html +++ b/docs/dev/news/index.html @@ -81,11 +81,6 @@
  • Example evaluation of NAFTA SOP Attachment examples
  • -
  • -
  • -
  • - -
  • News @@ -176,8 +171,7 @@
    -
    • ‘dimethenamid_2018’: Correct the data for the Borstel soil. The five observations from Staudenmaier (2013) that were previously stored as “Borstel 2” are actually just a subset of the 16 observations in “Borstel 1” which is now simply “Borstel”
    • -
    +
    • All plotting functions setting graphical parameters: Use on.exit() for resetting graphical parameters

    • @@ -186,12 +180,10 @@
    -
    • Review and update README, the ‘Introduction to mkin’ vignette and some of the help pages
    • -
    +
    -
    • ‘mkinfit’: Keep model names stored in ‘mkinmod’ objects, avoiding their loss in ‘gmkin’
    • -
    +
    • ‘confint.mmkin’, ‘nlme.mmkin’, ‘transform_odeparms’: Fix example code in dontrun sections that failed with current defaults

    • @@ -246,8 +238,7 @@
    -
    • Increase a test tolerance to make it pass on all CRAN check machines
    • -
    +
    • ‘nlme.mmkin’: An nlme method for mmkin row objects and an associated S3 class with print, plot, anova and endpoint methods

    • @@ -362,8 +353,7 @@
    -
    • Remove test_FOMC_ill-defined.R as it is too platform dependent
    • -
    +
    • Rename twa to max_twa_parent to avoid conflict with twa from my pfm package

    • @@ -375,8 +365,7 @@

      New features

      -
      • A twa function, calculating maximum time weighted average concentrations for the parent (SFO, FOMC and DFOP).
      • -
      +
      • A twa function, calculating maximum time weighted average concentrations for the parent (SFO, FOMC and DFOP).
    @@ -391,8 +380,7 @@

    Bug fixes

    -
    • The test test_FOMC_ill-defined failed on several architectures, so the test is now skipped
    • -
    +
    • The test test_FOMC_ill-defined failed on several architectures, so the test is now skipped
    @@ -426,8 +414,7 @@

    Major changes

    -
    • Add the argument from_max_mean to mkinfit, for fitting only the decline from the maximum observed value for models with a single observed variable
    • -
    +
    • Add the argument from_max_mean to mkinfit, for fitting only the decline from the maximum observed value for models with a single observed variable

    Minor changes

    • Add plots to compiled_models vignette

    • @@ -447,21 +434,18 @@

      Bug fixes

      • -print.summary.mkinfit(): Avoid an error that occurred when printing summaries generated with mkin versions before 0.9-36
      • -
      +print.summary.mkinfit(): Avoid an error that occurred when printing summaries generated with mkin versions before 0.9-36

    Bug fixes

    • -endpoints(): For DFOP and SFORB models, where optimize() is used, make use of the fact that the DT50 must be between DT50_k1 and DT50_k2 (DFOP) or DT50_b1 and DT50_b2 (SFORB), as optimize() sometimes did not find the minimum. Likewise for finding DT90 values. Also fit on the log scale to make the function more efficient.
    • -
    +endpoints(): For DFOP and SFORB models, where optimize() is used, make use of the fact that the DT50 must be between DT50_k1 and DT50_k2 (DFOP) or DT50_b1 and DT50_b2 (SFORB), as optimize() sometimes did not find the minimum. Likewise for finding DT90 values. Also fit on the log scale to make the function more efficient.
  • Internal changes

    +DESCRIPTION, NAMESPACE, R/*.R: Import (from) stats, graphics and methods packages, and qualify some function calls for non-base packages installed with R to avoid NOTES made by R CMD check –as-cran with upcoming R versions.
    @@ -473,8 +457,7 @@

    Bug fixes

    • -mkinparplot(): Fix the x axis scaling for rate constants and formation fractions that got confused by the introduction of the t-values of transformed parameters.
    • -
    +mkinparplot(): Fix the x axis scaling for rate constants and formation fractions that got confused by the introduction of the t-values of transformed parameters.
    @@ -486,8 +469,7 @@

    Bug fixes

    • -mkinmod(): When generating the C code for the derivatives, only declare the time variable when it is needed and remove the ‘-W-no-unused-variable’ compiler flag as the C compiler used in the CRAN checks on Solaris does not know it.
    • -
    +mkinmod(): When generating the C code for the derivatives, only declare the time variable when it is needed and remove the ‘-W-no-unused-variable’ compiler flag as the C compiler used in the CRAN checks on Solaris does not know it.
    @@ -500,15 +482,13 @@

    Minor changes

    -
    +

    Major changes

    -
    • Switch from RUnit to testthat for testing
    • -
    +

    Bug fixes

    New features

    -
    • It is now possible to use formation fractions in combination with turning off the sink in mkinmod().
    • -
    +

    Major changes

  • News diff --git a/docs/dev/reference/mkinpredict.html b/docs/dev/reference/mkinpredict.html index 01c15a19..7d8e7c26 100644 --- a/docs/dev/reference/mkinpredict.html +++ b/docs/dev/reference/mkinpredict.html @@ -394,11 +394,12 @@ as these always return mapped output.

    c(parent = 100, m1 = 0), seq(0, 20, by = 0.1), solution_type = "analytical", use_compiled = FALSE)[201,]) } +#> Loading required package: rbenchmark #> test relative elapsed -#> 2 deSolve_compiled 1.0 0.002 -#> 4 analytical 1.0 0.002 -#> 1 eigen 4.0 0.008 -#> 3 deSolve 30.5 0.061 +#> 2 deSolve_compiled 1.00 0.004 +#> 1 eigen 4.00 0.016 +#> 4 analytical 4.25 0.017 +#> 3 deSolve 40.75 0.163 # \dontrun{ # Predict from a fitted model diff --git a/docs/dev/reference/multistart-1.png b/docs/dev/reference/multistart-1.png index c7937d67..ee0306d6 100644 Binary files a/docs/dev/reference/multistart-1.png and b/docs/dev/reference/multistart-1.png differ diff --git a/docs/dev/reference/multistart-2.png b/docs/dev/reference/multistart-2.png index e1983f12..69a178e3 100644 Binary files a/docs/dev/reference/multistart-2.png and b/docs/dev/reference/multistart-2.png differ diff --git a/docs/dev/reference/multistart.html b/docs/dev/reference/multistart.html index 3cdede7b..36767560 100644 --- a/docs/dev/reference/multistart.html +++ b/docs/dev/reference/multistart.html @@ -222,15 +222,13 @@ doi: 10.1186/s12859-021-04373-4.

    f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2") illparms(f_saem_reduced) -# On Windows, we need to create a cluster first. When working with -# such a cluster, we need to export the mmkin object to the cluster -# nodes, as it is referred to when updating the saem object on the nodes. +# On Windows, we need to create a PSOCK cluster first and refer to it +# in the call to multistart() library(parallel) cl <- makePSOCKcluster(12) f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cluster = cl) -#> Error in checkForRemoteErrors(val): 16 nodes produced errors; first error: unused argument (mc.preschedule = FALSE) -parplot(f_saem_reduced_multi, lpos = "topright") -#> Error in parplot(f_saem_reduced_multi, lpos = "topright"): object 'f_saem_reduced_multi' not found +parplot(f_saem_reduced_multi, lpos = "topright", ylim = c(0.5, 2)) + stopCluster(cl) # }
  • diff --git a/docs/dev/sitemap.xml b/docs/dev/sitemap.xml index b70dc782..b3542d0b 100644 --- a/docs/dev/sitemap.xml +++ b/docs/dev/sitemap.xml @@ -54,9 +54,6 @@ https://pkgdown.jrwb.de/mkin/authors.html - - https://pkgdown.jrwb.de/mkin/coverage/coverage.html - https://pkgdown.jrwb.de/mkin/index.html diff --git a/inst/rmarkdown/templates/hierarchical_kinetics/skeleton/.skeleton.Rmd.swp b/inst/rmarkdown/templates/hierarchical_kinetics/skeleton/.skeleton.Rmd.swp new file mode 100644 index 00000000..2c5bfed8 Binary files /dev/null and b/inst/rmarkdown/templates/hierarchical_kinetics/skeleton/.skeleton.Rmd.swp differ diff --git a/man/multistart.Rd b/man/multistart.Rd index 5a5f7b44..0df29bfa 100644 --- a/man/multistart.Rd +++ b/man/multistart.Rd @@ -82,13 +82,12 @@ illparms(f_saem_full) f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2") illparms(f_saem_reduced) -# On Windows, we need to create a cluster first. When working with -# such a cluster, we need to export the mmkin object to the cluster -# nodes, as it is referred to when updating the saem object on the nodes. +# On Windows, we need to create a PSOCK cluster first and refer to it +# in the call to multistart() library(parallel) cl <- makePSOCKcluster(12) f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cluster = cl) -parplot(f_saem_reduced_multi, lpos = "topright") +parplot(f_saem_reduced_multi, lpos = "topright", ylim = c(0.5, 2)) stopCluster(cl) } } diff --git a/vignettes/web_only/multistart.html b/vignettes/web_only/multistart.html index 5568ad2c..93f08ca3 100644 --- a/vignettes/web_only/multistart.html +++ b/vignettes/web_only/multistart.html @@ -299,8 +299,8 @@ pre code { border-radius: 4px; } -.tabset-dropdown > .nav-tabs > li.active:before { - content: ""; +.tabset-dropdown > .nav-tabs > li.active:before, .tabset-dropdown > .nav-tabs.nav-tabs-open:before { + content: "\e259"; font-family: 'Glyphicons Halflings'; display: inline-block; padding: 10px; @@ -308,16 +308,9 @@ pre code { } .tabset-dropdown > .nav-tabs.nav-tabs-open > li.active:before { - content: ""; - border: none; -} - -.tabset-dropdown > .nav-tabs.nav-tabs-open:before { - content: ""; + content: "\e258"; font-family: 'Glyphicons Halflings'; - display: inline-block; - padding: 10px; - border-right: 1px solid #ddd; + border: none; } .tabset-dropdown > .nav-tabs > li.active { @@ -364,7 +357,7 @@ pre code {

    Short demo of the multistart method

    Johannes Ranke

    -

    Last change 26 September 2022 (rebuilt 2022-10-26)

    +

    Last change 17 April 2023 (rebuilt 2023-04-17)

    @@ -386,31 +379,30 @@ f_saem_full <- saem(f_mmkin) illparms(f_saem_full)
    ## [1] "sd(log_k2)"

    We see that not all variability parameters are identifiable. The illparms function tells us that the confidence interval for the standard deviation of ‘log_k2’ includes zero. We check this assessment using multiple runs with different starting values.

    -
    f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16)
    -parhist(f_saem_full_multi)
    -

    +
    f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 8)
    +parplot(f_saem_full_multi, lpos = "topleft")
    +

    This confirms that the variance of k2 is the most problematic parameter, so we reduce the parameter distribution model by removing the intersoil variability for k2.

    f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2")
    -illparms(f_saem_reduced)
    -
    ## character(0)
    -
    f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16)
    -parhist(f_saem_reduced_multi, lpos = "topright")
    -

    +illparms(f_saem_reduced) +f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 8) +parplot(f_saem_reduced_multi, lpos = "topright", ylim = c(0.5, 2)) +

    The results confirm that all remaining parameters can be determined with sufficient certainty.

    We can also analyse the log-likelihoods obtained in the multiple runs:

    llhist(f_saem_reduced_multi)
    -

    -

    The parameter histograms can be further improved by excluding the result with the low likelihood.

    -
    parhist(f_saem_reduced_multi, lpos = "topright", llmin = -326, ylim = c(0.5, 2))
    -

    -

    We can use the anova method to compare the models, including a likelihood ratio test if the models are nested.

    -
    anova(f_saem_full, best(f_saem_reduced_multi), test = TRUE)
    +

    +

    We can use the anova method to compare the models.

    +
    anova(f_saem_full, best(f_saem_full_multi),
    +  f_saem_reduced, best(f_saem_reduced_multi), test = TRUE)
    ## Data: 155 observations of 1 variable(s) grouped in 6 datasets
     ## 
    -##                            npar    AIC    BIC     Lik Chisq Df Pr(>Chisq)
    -## best(f_saem_reduced_multi)    9 663.81 661.93 -322.90                    
    -## f_saem_full                  10 668.27 666.19 -324.13     0  1          1
    -

    While AIC and BIC are lower for the reduced model, the likelihood ratio test does not indicate a significant difference between the fits.

    +## npar AIC BIC Lik Chisq Df Pr(>Chisq) +## f_saem_reduced 9 663.74 661.87 -322.87 +## best(f_saem_reduced_multi) 9 663.60 661.72 -322.80 0.1476 0 +## f_saem_full 10 670.35 668.26 -325.17 0.0000 1 1 +## best(f_saem_full_multi) 10 665.61 663.53 -322.80 4.7372 0 +

    The reduced model results in lower AIC and BIC values, so it is clearly preferable. Using multiple starting values gives a large improvement in case of the full model, because it is less well-defined, which impedes convergence. For the reduced model, using multiple starting values only results in a small improvement of the model fit.

    diff --git a/vignettes/web_only/multistart.rmd b/vignettes/web_only/multistart.rmd index 27a8a96a..ccf26b3d 100644 --- a/vignettes/web_only/multistart.rmd +++ b/vignettes/web_only/multistart.rmd @@ -1,7 +1,7 @@ --- title: Short demo of the multistart method author: Johannes Ranke -date: Last change 26 September 2022 (rebuilt `r Sys.Date()`) +date: Last change 17 April 2023 (rebuilt `r Sys.Date()`) output: html_document vignette: > @@ -39,8 +39,8 @@ of 'log_k2' includes zero. We check this assessment using multiple runs with different starting values. ```{r} -f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 16) -parplot(f_saem_full_multi) +f_saem_full_multi <- multistart(f_saem_full, n = 16, cores = 8) +parplot(f_saem_full_multi, lpos = "topleft") ``` This confirms that the variance of k2 is the most problematic parameter, so we @@ -50,8 +50,8 @@ for k2. ```{r} f_saem_reduced <- update(f_saem_full, no_random_effect = "log_k2") illparms(f_saem_reduced) -f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 16) -parplot(f_saem_reduced_multi, lpos = "topright") +f_saem_reduced_multi <- multistart(f_saem_reduced, n = 16, cores = 8) +parplot(f_saem_reduced_multi, lpos = "topright", ylim = c(0.5, 2)) ``` The results confirm that all remaining parameters can be determined with sufficient @@ -63,20 +63,17 @@ We can also analyse the log-likelihoods obtained in the multiple runs: llhist(f_saem_reduced_multi) ``` -The parameter histograms can be further improved by excluding the result with -the low likelihood. +We can use the `anova` method to compare the models. ```{r} -parplot(f_saem_reduced_multi, lpos = "topright", llmin = -326, ylim = c(0.5, 2)) +anova(f_saem_full, best(f_saem_full_multi), + f_saem_reduced, best(f_saem_reduced_multi), test = TRUE) ``` -We can use the `anova` method to compare the models, including a likelihood ratio -test if the models are nested. - -```{r} -anova(f_saem_full, best(f_saem_reduced_multi), test = TRUE) -``` - -While AIC and BIC are lower for the reduced model, the likelihood ratio test -does not indicate a significant difference between the fits. +The reduced model results in lower AIC and BIC values, so it +is clearly preferable. Using multiple starting values gives +a large improvement in case of the full model, because it is +less well-defined, which impedes convergence. For the reduced +model, using multiple starting values only results in a small +improvement of the model fit. -- cgit v1.2.1