From 0b754ffa91b9496bdd2f892cf3ca2bd887028dea Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Tue, 27 Jul 2021 18:22:01 +0200 Subject: Fix dimethenamid vignette problems and update docs --- vignettes/web_only/dimethenamid_2018.html | 109 ++++++++++++++++++++++++------ vignettes/web_only/dimethenamid_2018.rmd | 57 +++++++++++----- 2 files changed, 128 insertions(+), 38 deletions(-) (limited to 'vignettes') diff --git a/vignettes/web_only/dimethenamid_2018.html b/vignettes/web_only/dimethenamid_2018.html index e84a435c..df8200eb 100644 --- a/vignettes/web_only/dimethenamid_2018.html +++ b/vignettes/web_only/dimethenamid_2018.html @@ -1594,7 +1594,7 @@ div.tocify {

Example evaluations of the dimethenamid data from 2018

Johannes Ranke

-

Last change 23 June 2021, built on 25 Jun 2021

+

Last change 27 July 2021, built on 27 Jul 2021

@@ -1655,18 +1655,20 @@ f_parent_mkin_tc <- mmkin(c("SFO", "DFOP"), dmta_ds,

nlme

The nlme package was the first R extension providing facilities to fit nonlinear mixed-effects models. We use would like to do model selection from all four combinations of degradation models and error models based on the AIC. However, fitting the DFOP model with constant variance and using default control parameters results in an error, signalling that the maximum number of 50 iterations was reached, potentially indicating overparameterisation. However, the algorithm converges when the two-component error model is used in combination with the DFOP model. This can be explained by the fact that the smaller residues observed at later sampling times get more weight when using the two-component error model which will counteract the tendency of the algorithm to try parameter combinations unsuitable for fitting these data.

-
f_parent_nlme_sfo_const <- nlme(f_parent_mkin_const["SFO", ])
-#f_parent_nlme_dfop_const <- nlme(f_parent_mkin_const["DFOP", ]) # error
+
library(nlme)
+f_parent_nlme_sfo_const <- nlme(f_parent_mkin_const["SFO", ])
+#f_parent_nlme_dfop_const <- nlme(f_parent_mkin_const["DFOP", ])
+# maxIter = 50 reached
 f_parent_nlme_sfo_tc <- nlme(f_parent_mkin_tc["SFO", ])
 f_parent_nlme_dfop_tc <- nlme(f_parent_mkin_tc["DFOP", ])

Note that overparameterisation is also indicated by warnings obtained when fitting SFO or DFOP with the two-component error model (‘false convergence’ in the ‘LME step’ in some iterations). In addition to these fits, attempts were also made to include correlations between random effects by using the log Cholesky parameterisation of the matrix specifying them. The code used for these attempts can be made visible below.

f_parent_nlme_sfo_const_logchol <- nlme(f_parent_mkin_const["SFO", ],
   random = pdLogChol(list(DMTA_0 ~ 1, log_k_DMTA ~ 1)))
 anova(f_parent_nlme_sfo_const, f_parent_nlme_sfo_const_logchol) # not better
-f_parent_nlme_dfop_tc_logchol <- update(f_parent_nlme_dfop_tc,
-  random = pdLogChol(list(DMTA_0 ~ 1, log_k1 ~ 1, log_k2 ~ 1, g_qlogis ~ 1)))
+#f_parent_nlme_dfop_tc_logchol <- update(f_parent_nlme_dfop_tc,
+#  random = pdLogChol(list(DMTA_0 ~ 1, log_k1 ~ 1, log_k2 ~ 1, g_qlogis ~ 1)))
 # using log Cholesky parameterisation for random effects (nlme default) does
-# not converge and gives lots of warnings about the LME step not converging
+# not converge here and gives lots of warnings about the LME step not converging

The model comparison function of the nlme package can directly be applied to these fits showing a similar goodness-of-fit of the SFO model, but a much lower AIC for the DFOP model fitted with the two-component error model. Also, the likelihood ratio test indicates that this difference is significant. as the p-value is below 0.0001.

anova(
   f_parent_nlme_sfo_const, f_parent_nlme_sfo_tc, f_parent_nlme_dfop_tc
@@ -1685,24 +1687,24 @@ f_parent_nlme_dfop_tc       3 10 687.84 718.59 -333.92 2 vs 3 140.771  <.0001
 

The corresponding SAEM fits of the four combinations of degradation and error models are fitted below. As there is no convergence criterion implemented in the saemix package, the convergence plots need to be manually checked for every fit.

The convergence plot for the SFO model using constant variance is shown below.

library(saemix)
-f_parent_saemix_sfo_const <- saem(f_parent_mkin_const["SFO", ], quiet = TRUE,
+f_parent_saemix_sfo_const <- mkin::saem(f_parent_mkin_const["SFO", ], quiet = TRUE,
   transformations = "saemix")
 plot(f_parent_saemix_sfo_const$so, plot.type = "convergence")

Obviously the default number of iterations is sufficient to reach convergence. This can also be said for the SFO fit using the two-component error model.

-
f_parent_saemix_sfo_tc <- saem(f_parent_mkin_tc["SFO", ], quiet = TRUE,
+
f_parent_saemix_sfo_tc <- mkin::saem(f_parent_mkin_tc["SFO", ], quiet = TRUE,
   transformations = "saemix")
 plot(f_parent_saemix_sfo_tc$so, plot.type = "convergence")

-

When fitting the DFOP model with constant variance, parameter convergence is not as unambiguous. Therefore, the number of iterations in the first phase of the algorithm was increased, leading to visually satisfying convergence.

-
f_parent_saemix_dfop_const <- saem(f_parent_mkin_const["DFOP", ], quiet = TRUE,
+

When fitting the DFOP model with constant variance, parameter convergence is not as unambiguous (see the failure of nlme with the default number of iterations above). Therefore, the number of iterations in the first phase of the algorithm was increased, leading to visually satisfying convergence.

+
f_parent_saemix_dfop_const <- mkin::saem(f_parent_mkin_const["DFOP", ], quiet = TRUE,
   control = saemixControl(nbiter.saemix = c(800, 200), print = FALSE,
     save = FALSE, save.graphs = FALSE, displayProgress = FALSE),
   transformations = "saemix")
 plot(f_parent_saemix_dfop_const$so, plot.type = "convergence")

-

The same applies to the case where the DFOP model is fitted with the two-component error model.

-
f_parent_saemix_dfop_tc_moreiter <- saem(f_parent_mkin_tc["DFOP", ], quiet = TRUE,
+

The same applies to the case where the DFOP model is fitted with the two-component error model. Convergence of the variance of k2 is enhanced by using the two-component error, it remains more or less stable already after 200 iterations of the first phase.

+
f_parent_saemix_dfop_tc_moreiter <- mkin::saem(f_parent_mkin_tc["DFOP", ], quiet = TRUE,
   control = saemixControl(nbiter.saemix = c(800, 200), print = FALSE,
     save = FALSE, save.graphs = FALSE, displayProgress = FALSE),
   transformations = "saemix")
@@ -1710,20 +1712,31 @@ plot(f_parent_saemix_dfop_tc_moreiter$so, plot.type = "convergence")

The four combinations can be compared using the model comparison function from the saemix package:

compare.saemix(f_parent_saemix_sfo_const$so, f_parent_saemix_sfo_tc$so,
-  f_parent_saemix_dfop_const$so, f_parent_saemix_dfop_tc$so)
+ f_parent_saemix_dfop_const$so, f_parent_saemix_dfop_tc_moreiter$so)
Likelihoods calculated by importance sampling
     AIC    BIC
 1 818.37 817.33
 2 820.38 819.14
 3 725.91 724.04
-4 688.09 686.01
+4 683.64 681.55

As in the case of nlme fits, the DFOP model fitted with two-component error (number 4) gives the lowest AIC. The numeric values are reasonably close to the ones obtained using nlme, considering that the algorithms for fitting the model and for the likelihood calculation are quite different.

+

In order to check the influence of the likelihood calculation algorithms implemented in saemix, the likelihood from Gaussian quadrature is added to the best fit, and the AIC values obtained from the three methods are compared.

+
f_parent_saemix_dfop_tc_moreiter$so <-
+  llgq.saemix(f_parent_saemix_dfop_tc_moreiter$so)
+AIC(f_parent_saemix_dfop_tc_moreiter$so)
+
[1] 683.64
+
AIC(f_parent_saemix_dfop_tc_moreiter$so, method = "gq")
+
[1] 683.7
+
AIC(f_parent_saemix_dfop_tc_moreiter$so, method = "lin")
+
[1] 683.17
+

The AIC values based on importance sampling and Gaussian quadrature are quite similar. Using linearisation is less accurate, but still gives a similar value.

nlmixr

In the last years, a lot of effort has been put into the nlmixr package which is designed for pharmacokinetics, where nonlinear mixed-effects models are routinely used, but which can also be used for related data like chemical degradation data. A current development branch of the mkin package provides an interface between mkin and nlmixr. Here, we check if we get equivalent results when using a refined version of the First Order Conditional Estimation (FOCE) algorithm used in nlme, namely First Order Conditional Estimation with Interaction (FOCEI), and the SAEM algorithm as implemented in nlmixr.

First, the focei algorithm is used for the four model combinations and the goodness of fit of the results is compared.

-
f_parent_nlmixr_focei_sfo_const <- nlmixr(f_parent_mkin_const["SFO", ], est = "focei")
+
library(nlmixr)
+f_parent_nlmixr_focei_sfo_const <- nlmixr(f_parent_mkin_const["SFO", ], est = "focei")
 f_parent_nlmixr_focei_sfo_tc <- nlmixr(f_parent_mkin_tc["SFO", ], est = "focei")
 f_parent_nlmixr_focei_dfop_const <- nlmixr(f_parent_mkin_const["DFOP", ], est = "focei")
 f_parent_nlmixr_focei_dfop_tc<- nlmixr(f_parent_mkin_tc["DFOP", ], est = "focei")
@@ -1734,7 +1747,14 @@ f_parent_nlmixr_focei_sfo_const$nm 5 818.63 f_parent_nlmixr_focei_sfo_tc$nm 6 820.61 f_parent_nlmixr_focei_dfop_const$nm 9 728.11 f_parent_nlmixr_focei_dfop_tc$nm 10 687.82
-

The AIC values are very close to the ones obtained with nlme.

+

The AIC values are very close to the ones obtained with nlme which are repeated below for convenience.

+
AIC(
+  f_parent_nlme_sfo_const, f_parent_nlme_sfo_tc, f_parent_nlme_dfop_tc
+)
+
                        df    AIC
+f_parent_nlme_sfo_const  5 818.63
+f_parent_nlme_sfo_tc     6 820.61
+f_parent_nlme_dfop_tc   10 687.84

Secondly, we use the SAEM estimation routine and check the convergence plots for SFO with constant variance

f_parent_nlmixr_saem_sfo_const <- nlmixr(f_parent_mkin_const["SFO", ], est = "saem",
   control = nlmixr::saemControl(logLik = TRUE))
@@ -1743,17 +1763,17 @@ traceplot(f_parent_nlmixr_saem_sfo_const$nm)

for SFO with two-component error

f_parent_nlmixr_saem_sfo_tc <- nlmixr(f_parent_mkin_tc["SFO", ], est = "saem",
   control = nlmixr::saemControl(logLik = TRUE))
-nlmixr::traceplot(f_parent_nlmixr_saem_sfo_tc$nm)
+traceplot(f_parent_nlmixr_saem_sfo_tc$nm)

For DFOP with constant variance, the convergence plots show considerable instability of the fit, which can be alleviated by increasing the number of iterations and the number of parallel chains for the first phase of algorithm.

f_parent_nlmixr_saem_dfop_const <- nlmixr(f_parent_mkin_const["DFOP", ], est = "saem",
   control = nlmixr::saemControl(logLik = TRUE, nBurn = 1000), nmc = 15)
-nlmixr::traceplot(f_parent_nlmixr_saem_dfop_const$nm)
+traceplot(f_parent_nlmixr_saem_dfop_const$nm)

For DFOP with two-component error, the same increase in iterations and parallel chains was used, but using the two-component error appears to lead to a less erratic convergence, so this may not be necessary to this degree.

f_parent_nlmixr_saem_dfop_tc <- nlmixr(f_parent_mkin_tc["DFOP", ], est = "saem",
   control = nlmixr::saemControl(logLik = TRUE, nBurn = 1000, nmc = 15))
-nlmixr::traceplot(f_parent_nlmixr_saem_dfop_tc$nm)
+traceplot(f_parent_nlmixr_saem_dfop_tc$nm)

The AIC values are internally calculated using Gaussian quadrature. For an unknown reason, the AIC value obtained for the DFOP fit using the two-component error model is given as Infinity.

AIC(f_parent_nlmixr_saem_sfo_const$nm, f_parent_nlmixr_saem_sfo_tc$nm,
@@ -1761,8 +1781,55 @@ nlmixr::traceplot(f_parent_nlmixr_saem_dfop_tc$nm)
                                   df    AIC
 f_parent_nlmixr_saem_sfo_const$nm   5 820.54
 f_parent_nlmixr_saem_sfo_tc$nm      6 835.26
-f_parent_nlmixr_saem_dfop_const$nm  9 850.72
-f_parent_nlmixr_saem_dfop_tc$nm    10    Inf
+f_parent_nlmixr_saem_dfop_const$nm 9 842.84 +f_parent_nlmixr_saem_dfop_tc$nm 10 684.51 +

The following table gives the AIC values obtained with the three packages.

+
AIC_all <- data.frame(
+  nlme = c(AIC(f_parent_nlme_sfo_const), AIC(f_parent_nlme_sfo_tc), NA, AIC(f_parent_nlme_dfop_tc)),
+  nlmixr_focei = sapply(list(f_parent_nlmixr_focei_sfo_const$nm, f_parent_nlmixr_focei_sfo_tc$nm,
+  f_parent_nlmixr_focei_dfop_const$nm, f_parent_nlmixr_focei_dfop_tc$nm), AIC),
+  saemix = sapply(list(f_parent_saemix_sfo_const$so, f_parent_saemix_sfo_tc$so,
+    f_parent_saemix_dfop_const$so, f_parent_saemix_dfop_tc_moreiter$so), AIC),
+  nlmixr_saem = sapply(list(f_parent_nlmixr_saem_sfo_const$nm, f_parent_nlmixr_saem_sfo_tc$nm,
+  f_parent_nlmixr_saem_dfop_const$nm, f_parent_nlmixr_saem_dfop_tc$nm), AIC)
+)
+kable(AIC_all)
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
nlmenlmixr_foceisaemixnlmixr_saem
818.63818.63818.37820.54
820.61820.61820.38835.26
NA728.11725.91842.84
687.84687.82683.64684.51
diff --git a/vignettes/web_only/dimethenamid_2018.rmd b/vignettes/web_only/dimethenamid_2018.rmd index d3541a34..30325044 100644 --- a/vignettes/web_only/dimethenamid_2018.rmd +++ b/vignettes/web_only/dimethenamid_2018.rmd @@ -1,7 +1,7 @@ --- title: Example evaluations of the dimethenamid data from 2018 author: Johannes Ranke -date: Last change 23 June 2021, built on `r format(Sys.Date(), format = "%d %b %Y")` +date: Last change 27 July 2021, built on `r format(Sys.Date(), format = "%d %b %Y")` output: html_document: toc: true @@ -163,8 +163,10 @@ tendency of the algorithm to try parameter combinations unsuitable for fitting these data. ```{r f_parent_nlme, warning = FALSE} +library(nlme) f_parent_nlme_sfo_const <- nlme(f_parent_mkin_const["SFO", ]) -#f_parent_nlme_dfop_const <- nlme(f_parent_mkin_const["DFOP", ]) # error +#f_parent_nlme_dfop_const <- nlme(f_parent_mkin_const["DFOP", ]) +# maxIter = 50 reached f_parent_nlme_sfo_tc <- nlme(f_parent_mkin_tc["SFO", ]) f_parent_nlme_dfop_tc <- nlme(f_parent_mkin_tc["DFOP", ]) ``` @@ -180,10 +182,10 @@ used for these attempts can be made visible below. f_parent_nlme_sfo_const_logchol <- nlme(f_parent_mkin_const["SFO", ], random = pdLogChol(list(DMTA_0 ~ 1, log_k_DMTA ~ 1))) anova(f_parent_nlme_sfo_const, f_parent_nlme_sfo_const_logchol) # not better -f_parent_nlme_dfop_tc_logchol <- update(f_parent_nlme_dfop_tc, - random = pdLogChol(list(DMTA_0 ~ 1, log_k1 ~ 1, log_k2 ~ 1, g_qlogis ~ 1))) +#f_parent_nlme_dfop_tc_logchol <- update(f_parent_nlme_dfop_tc, +# random = pdLogChol(list(DMTA_0 ~ 1, log_k1 ~ 1, log_k2 ~ 1, g_qlogis ~ 1))) # using log Cholesky parameterisation for random effects (nlme default) does -# not converge and gives lots of warnings about the LME step not converging +# not converge here and gives lots of warnings about the LME step not converging ``` The model comparison function of the nlme package can directly be applied @@ -221,7 +223,7 @@ The convergence plot for the SFO model using constant variance is shown below. ```{r f_parent_saemix_sfo_const, results = 'hide'} library(saemix) -f_parent_saemix_sfo_const <- saem(f_parent_mkin_const["SFO", ], quiet = TRUE, +f_parent_saemix_sfo_const <- mkin::saem(f_parent_mkin_const["SFO", ], quiet = TRUE, transformations = "saemix") plot(f_parent_saemix_sfo_const$so, plot.type = "convergence") ``` @@ -230,18 +232,19 @@ Obviously the default number of iterations is sufficient to reach convergence. This can also be said for the SFO fit using the two-component error model. ```{r f_parent_saemix_sfo_tc, results = 'hide'} -f_parent_saemix_sfo_tc <- saem(f_parent_mkin_tc["SFO", ], quiet = TRUE, +f_parent_saemix_sfo_tc <- mkin::saem(f_parent_mkin_tc["SFO", ], quiet = TRUE, transformations = "saemix") plot(f_parent_saemix_sfo_tc$so, plot.type = "convergence") ``` When fitting the DFOP model with constant variance, parameter convergence -is not as unambiguous. Therefore, the number of iterations in the first +is not as unambiguous (see the failure of nlme with the default number of +iterations above). Therefore, the number of iterations in the first phase of the algorithm was increased, leading to visually satisfying convergence. ```{r f_parent_saemix_dfop_const, results = 'hide'} -f_parent_saemix_dfop_const <- saem(f_parent_mkin_const["DFOP", ], quiet = TRUE, +f_parent_saemix_dfop_const <- mkin::saem(f_parent_mkin_const["DFOP", ], quiet = TRUE, control = saemixControl(nbiter.saemix = c(800, 200), print = FALSE, save = FALSE, save.graphs = FALSE, displayProgress = FALSE), transformations = "saemix") @@ -250,11 +253,11 @@ plot(f_parent_saemix_dfop_const$so, plot.type = "convergence") The same applies to the case where the DFOP model is fitted with the two-component error model. Convergence of the variance of k2 is enhanced -by using the two-component error, it remains pretty stable already after 200 +by using the two-component error, it remains more or less stable already after 200 iterations of the first phase. ```{r f_parent_saemix_dfop_tc_moreiter, results = 'hide'} -f_parent_saemix_dfop_tc_moreiter <- saem(f_parent_mkin_tc["DFOP", ], quiet = TRUE, +f_parent_saemix_dfop_tc_moreiter <- mkin::saem(f_parent_mkin_tc["DFOP", ], quiet = TRUE, control = saemixControl(nbiter.saemix = c(800, 200), print = FALSE, save = FALSE, save.graphs = FALSE, displayProgress = FALSE), transformations = "saemix") @@ -306,6 +309,7 @@ First, the focei algorithm is used for the four model combinations and the goodness of fit of the results is compared. ```{r f_parent_nlmixr_focei, results = "hide", message = FALSE, warning = FALSE} +library(nlmixr) f_parent_nlmixr_focei_sfo_const <- nlmixr(f_parent_mkin_const["SFO", ], est = "focei") f_parent_nlmixr_focei_sfo_tc <- nlmixr(f_parent_mkin_tc["SFO", ], est = "focei") f_parent_nlmixr_focei_dfop_const <- nlmixr(f_parent_mkin_const["DFOP", ], est = "focei") @@ -317,7 +321,14 @@ AIC(f_parent_nlmixr_focei_sfo_const$nm, f_parent_nlmixr_focei_sfo_tc$nm, f_parent_nlmixr_focei_dfop_const$nm, f_parent_nlmixr_focei_dfop_tc$nm) ``` -The AIC values are very close to the ones obtained with nlme. +The AIC values are very close to the ones obtained with nlme which are repeated below +for convenience. + +```{r AIC_parent_nlme_rep} +AIC( + f_parent_nlme_sfo_const, f_parent_nlme_sfo_tc, f_parent_nlme_dfop_tc +) +``` Secondly, we use the SAEM estimation routine and check the convergence plots for SFO with constant variance @@ -333,7 +344,7 @@ for SFO with two-component error ```{r f_parent_nlmixr_saem_sfo_tc, results = "hide", warning = FALSE, message = FALSE} f_parent_nlmixr_saem_sfo_tc <- nlmixr(f_parent_mkin_tc["SFO", ], est = "saem", control = nlmixr::saemControl(logLik = TRUE)) -nlmixr::traceplot(f_parent_nlmixr_saem_sfo_tc$nm) +traceplot(f_parent_nlmixr_saem_sfo_tc$nm) ``` For DFOP with constant variance, the convergence plots show considerable instability @@ -343,7 +354,7 @@ the number of parallel chains for the first phase of algorithm. ```{r f_parent_nlmixr_saem_dfop_const, results = "hide", warning = FALSE, message = FALSE} f_parent_nlmixr_saem_dfop_const <- nlmixr(f_parent_mkin_const["DFOP", ], est = "saem", control = nlmixr::saemControl(logLik = TRUE, nBurn = 1000), nmc = 15) -nlmixr::traceplot(f_parent_nlmixr_saem_dfop_const$nm) +traceplot(f_parent_nlmixr_saem_dfop_const$nm) ``` For DFOP with two-component error, the same increase in iterations and parallel @@ -354,7 +365,7 @@ erratic convergence, so this may not be necessary to this degree. ```{r f_parent_nlmixr_saem_dfop_tc, results = "hide", warning = FALSE, message = FALSE} f_parent_nlmixr_saem_dfop_tc <- nlmixr(f_parent_mkin_tc["DFOP", ], est = "saem", control = nlmixr::saemControl(logLik = TRUE, nBurn = 1000, nmc = 15)) -nlmixr::traceplot(f_parent_nlmixr_saem_dfop_tc$nm) +traceplot(f_parent_nlmixr_saem_dfop_tc$nm) ``` The AIC values are internally calculated using Gaussian quadrature. For an @@ -366,8 +377,20 @@ AIC(f_parent_nlmixr_saem_sfo_const$nm, f_parent_nlmixr_saem_sfo_tc$nm, f_parent_nlmixr_saem_dfop_const$nm, f_parent_nlmixr_saem_dfop_tc$nm) ``` - - +The following table gives the AIC values obtained with the three packages. + +```{r AIC_all} +AIC_all <- data.frame( + nlme = c(AIC(f_parent_nlme_sfo_const), AIC(f_parent_nlme_sfo_tc), NA, AIC(f_parent_nlme_dfop_tc)), + nlmixr_focei = sapply(list(f_parent_nlmixr_focei_sfo_const$nm, f_parent_nlmixr_focei_sfo_tc$nm, + f_parent_nlmixr_focei_dfop_const$nm, f_parent_nlmixr_focei_dfop_tc$nm), AIC), + saemix = sapply(list(f_parent_saemix_sfo_const$so, f_parent_saemix_sfo_tc$so, + f_parent_saemix_dfop_const$so, f_parent_saemix_dfop_tc_moreiter$so), AIC), + nlmixr_saem = sapply(list(f_parent_nlmixr_saem_sfo_const$nm, f_parent_nlmixr_saem_sfo_tc$nm, + f_parent_nlmixr_saem_dfop_const$nm, f_parent_nlmixr_saem_dfop_tc$nm), AIC) +) +kable(AIC_all) +``` # References -- cgit v1.2.1