From f35e0b3d3b9f41bee2f5cc357afcb69e3aadad15 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Fri, 8 Jul 2022 17:39:44 +0200 Subject: Store DLL info in mkinmod objects for performance Thanks to Tomas Kalibera for his analysis of the problem on the r-package-devel mailing list and for the suggestion on how to fix it. See the current benchmark vignette for the new data on mkin 1.1.1 with R 4.2.1, with unprecedented performance. --- docs/articles/web_only/dimethenamid_2018.html | 92 ++++++++++++++------------- 1 file changed, 47 insertions(+), 45 deletions(-) (limited to 'docs/articles/web_only/dimethenamid_2018.html') diff --git a/docs/articles/web_only/dimethenamid_2018.html b/docs/articles/web_only/dimethenamid_2018.html index 25fd9f9e..b020a7b0 100644 --- a/docs/articles/web_only/dimethenamid_2018.html +++ b/docs/articles/web_only/dimethenamid_2018.html @@ -33,7 +33,7 @@ mkin - 1.1.0 + 1.1.1 @@ -105,7 +105,7 @@

Example evaluations of the dimethenamid data from 2018

Johannes Ranke

-

Last change 1 July 2022, built on 01 Jul 2022

+

Last change 1 July 2022, built on 08 Jul 2022

Source: vignettes/web_only/dimethenamid_2018.rmd @@ -178,7 +178,7 @@ Status of individual fits: dataset model Calke Borstel Flaach BBA 2.2 BBA 2.3 Elliot - DFOP OK OK C OK C OK + DFOP OK OK OK OK C OK OK: No warnings C: Optimisation did not converge: @@ -286,21 +286,23 @@ DMTA_0 97.99583 96.50079 99.4909 k1 0.06377 0.03432 0.0932 k2 0.00848 0.00444 0.0125 g 0.95701 0.91313 1.0009 -a.1 1.82141 1.65974 1.9831 -SD.DMTA_0 1.64787 0.45779 2.8379 +a.1 1.82141 1.60516 2.0377 +SD.DMTA_0 1.64787 0.45729 2.8384 SD.k1 0.57439 0.24731 0.9015 -SD.k2 0.03296 -2.50143 2.5673 -SD.g 1.10266 0.32371 1.8816 +SD.k2 0.03296 -2.50524 2.5712 +SD.g 1.10266 0.32354 1.8818

While the other parameters converge to credible values, the variance of k2 (omega2.k2) converges to a very small value. The printout of the saem.mmkin model shows that the estimated standard deviation of k2 across the population of soils (SD.k2) is ill-defined, indicating overparameterisation of this model.

When the DFOP model is fitted with the two-component error model, we also observe that the estimated variance of k2 becomes very small, while being ill-defined, as illustrated by the excessive confidence interval of SD.k2.

 f_parent_saemix_dfop_tc <- mkin::saem(f_parent_mkin_tc["DFOP", ], quiet = TRUE,
   control = saemix_control, transformations = "saemix")
 f_parent_saemix_dfop_tc_moreiter <- mkin::saem(f_parent_mkin_tc["DFOP", ], quiet = TRUE,
-  control = saemix_control_moreiter, transformations = "saemix")
-plot(f_parent_saemix_dfop_tc$so, plot.type = "convergence")
+ control = saemix_control_moreiter, transformations = "saemix") +
Likelihood cannot be computed by Importance Sampling.
+
+plot(f_parent_saemix_dfop_tc$so, plot.type = "convergence")

-
+
 print(f_parent_saemix_dfop_tc)
Kinetic nonlinear mixed-effects model fit by SAEM
 Structural model:
@@ -316,21 +318,21 @@ Likelihood computed by importance sampling
   666 664   -323
 
 Fitted parameters:
-          estimate    lower    upper
-DMTA_0    98.27617  96.3088 100.2436
-k1         0.06437   0.0337   0.0950
-k2         0.00880   0.0063   0.0113
-g          0.95249   0.9100   0.9949
-a.1        1.06161   0.8625   1.2607
-b.1        0.02967   0.0226   0.0367
-SD.DMTA_0  2.06075   0.4187   3.7028
-SD.k1      0.59357   0.2561   0.9310
-SD.k2      0.00292 -10.2960  10.3019
-SD.g       1.05725   0.3808   1.7337
+ estimate lower upper +DMTA_0 9.82e+01 96.27937 100.1783 +k1 6.41e-02 0.03333 0.0948 +k2 8.56e-03 0.00608 0.0110 +g 9.55e-01 0.91440 0.9947 +a.1 1.07e+00 0.86542 1.2647 +b.1 2.96e-02 0.02258 0.0367 +SD.DMTA_0 2.04e+00 0.40629 3.6678 +SD.k1 5.98e-01 0.25796 0.9373 +SD.k2 5.28e-04 -58.93251 58.9336 +SD.g 1.04e+00 0.36509 1.7083

Doubling the number of iterations in the first phase of the algorithm leads to a slightly lower likelihood, and therefore to slightly higher AIC and BIC values. With even more iterations, the algorithm stops with an error message. This is related to the variance of k2 approximating zero and has been submitted as a bug to the saemix package, as the algorithm does not converge in this case.

An alternative way to fit DFOP in combination with the two-component error model is to use the model formulation with transformed parameters as used per default in mkin. When using this option, convergence is slower, but eventually the algorithm stops as well with the same error message.

The four combinations (SFO/const, SFO/tc, DFOP/const and DFOP/tc) and the version with increased iterations can be compared using the model comparison function of the saemix package:

-
+
 AIC_parent_saemix <- saemix::compare.saemix(
   f_parent_saemix_sfo_const$so,
   f_parent_saemix_sfo_tc$so,
@@ -338,7 +340,7 @@ SD.g       1.05725   0.3808   1.7337
f_parent_saemix_dfop_tc$so, f_parent_saemix_dfop_tc_moreiter$so)
Likelihoods calculated by importance sampling
-
+
 rownames(AIC_parent_saemix) <- c(
   "SFO const", "SFO tc", "DFOP const", "DFOP tc", "DFOP tc more iterations")
 print(AIC_parent_saemix)
@@ -346,10 +348,10 @@ SD.g 1.05725 0.3808 1.7337
SFO const 796.38 795.34 SFO tc 798.38 797.13 DFOP const 705.75 703.88 -DFOP tc 665.65 663.57 -DFOP tc more iterations 665.88 663.80 +DFOP tc 665.72 663.63 +DFOP tc more iterations NaN NaN

In order to check the influence of the likelihood calculation algorithms implemented in saemix, the likelihood from Gaussian quadrature is added to the best fit, and the AIC values obtained from the three methods are compared.

-
+
 f_parent_saemix_dfop_tc$so <-
   saemix::llgq.saemix(f_parent_saemix_dfop_tc$so)
 AIC_parent_saemix_methods <- c(
@@ -359,11 +361,11 @@ DFOP tc more iterations 665.88 663.80
) print(AIC_parent_saemix_methods)
    is     gq    lin 
-665.65 665.68 665.11 
+665.72 665.88 665.15

The AIC values based on importance sampling and Gaussian quadrature are very similar. Using linearisation is known to be less accurate, but still gives a similar value.

In order to illustrate that the comparison of the three method depends on the degree of convergence obtained in the fit, the same comparison is shown below for the fit using the defaults for the number of iterations and the number of MCMC chains.

When using OpenBlas for linear algebra, there is a large difference in the values obtained with Gaussian quadrature, so the larger number of iterations makes a lot of difference. When using the LAPACK version coming with Debian Bullseye, the AIC based on Gaussian quadrature is almost the same as the one obtained with the other methods, also when using defaults for the fit.

-
+
 f_parent_saemix_dfop_tc_defaults <- mkin::saem(f_parent_mkin_tc["DFOP", ])
 f_parent_saemix_dfop_tc_defaults$so <-
   saemix::llgq.saemix(f_parent_saemix_dfop_tc_defaults$so)
@@ -374,14 +376,14 @@ DFOP tc more iterations 665.88 663.80
) print(AIC_parent_saemix_methods_defaults)
    is     gq    lin 
-668.27 718.36 666.49 
+668.91 663.61 667.40

Comparison

The following table gives the AIC values obtained with both backend packages using the same control parameters (800 iterations burn-in, 300 iterations second phase, 15 chains).

-
+
 AIC_all <- data.frame(
   check.names = FALSE,
   "Degradation model" = c("SFO", "SFO", "DFOP", "DFOP"),
@@ -406,7 +408,7 @@ DFOP tc more iterations 665.88 663.80
SFO const 796.60 -796.60 +794.17 796.38 @@ -420,15 +422,15 @@ DFOP tc more iterations 665.88 663.80
DFOP const NA -671.98 +704.95 705.75 DFOP tc 671.91 -665.11 -665.65 +665.15 +665.72 @@ -443,15 +445,15 @@ DFOP tc more iterations 665.88 663.80

Session Info

-
+
 
R version 4.2.1 (2022-06-23)
 Platform: x86_64-pc-linux-gnu (64-bit)
 Running under: Debian GNU/Linux 11 (bullseye)
 
 Matrix products: default
-BLAS:   /usr/lib/x86_64-linux-gnu/openblas-serial/libblas.so.3
-LAPACK: /usr/lib/x86_64-linux-gnu/openblas-serial/libopenblas-r0.3.13.so
+BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
+LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
 
 locale:
  [1] LC_CTYPE=de_DE.UTF-8       LC_NUMERIC=C              
@@ -466,24 +468,24 @@ attached base packages:
 [8] base     
 
 other attached packages:
-[1] saemix_3.0   npde_3.2     nlme_3.1-158 mkin_1.1.0   knitr_1.39  
+[1] nlme_3.1-158 mkin_1.1.1   knitr_1.39  
 
 loaded via a namespace (and not attached):
  [1] deSolve_1.32      zoo_1.8-10        tidyselect_1.1.2  xfun_0.31        
  [5] bslib_0.3.1       purrr_0.3.4       lattice_0.20-45   colorspace_2.0-3 
- [9] vctrs_0.4.1       generics_0.1.2    htmltools_0.5.2   yaml_2.3.5       
-[13] utf8_1.2.2        rlang_1.0.3       pkgdown_2.0.5     jquerylib_0.1.4  
-[17] pillar_1.7.0      glue_1.6.2        DBI_1.1.3         lifecycle_1.0.1  
+ [9] vctrs_0.4.1       generics_0.1.3    htmltools_0.5.2   yaml_2.3.5       
+[13] utf8_1.2.2        rlang_1.0.3       pkgdown_2.0.5     saemix_3.0       
+[17] jquerylib_0.1.4   pillar_1.7.0      glue_1.6.2        lifecycle_1.0.1  
 [21] stringr_1.4.0     munsell_0.5.0     gtable_0.3.0      ragg_1.2.2       
-[25] codetools_0.2-18  memoise_2.0.1     evaluate_0.15     fastmap_1.1.0    
+[25] memoise_2.0.1     evaluate_0.15     npde_3.2          fastmap_1.1.0    
 [29] lmtest_0.9-40     fansi_1.0.3       highr_0.9         scales_1.2.0     
 [33] cachem_1.0.6      desc_1.4.1        jsonlite_1.8.0    systemfonts_1.0.4
-[37] fs_1.5.2          gridExtra_2.3     textshaping_0.3.6 ggplot2_3.3.6    
+[37] fs_1.5.2          textshaping_0.3.6 gridExtra_2.3     ggplot2_3.3.6    
 [41] digest_0.6.29     stringi_1.7.6     dplyr_1.0.9       grid_4.2.1       
 [45] rprojroot_2.0.3   cli_3.3.0         tools_4.2.1       magrittr_2.0.3   
 [49] sass_0.4.1        tibble_3.1.7      crayon_1.5.1      pkgconfig_2.0.3  
-[53] ellipsis_0.3.2    assertthat_0.2.1  rmarkdown_2.14    mclust_5.4.10    
-[57] R6_2.5.1          compiler_4.2.1   
+[53] ellipsis_0.3.2 rmarkdown_2.14 R6_2.5.1 mclust_5.4.10 +[57] compiler_4.2.1

References -- cgit v1.2.1