From 76a0aae725f4d603b3c8e8442bb67081891986b4 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Thu, 16 Nov 2017 14:55:06 +0100 Subject: Add the code used for generating synthetic_data_for_UBA Static documentation except articles rebuilt by pkgdown --- DESCRIPTION | 4 +- NEWS.md | 4 + R/add_err.R | 6 +- build.log | 7 +- docs/authors.html | 7 +- docs/index.html | 13 +- docs/news/index.html | 22 ++- docs/reference/add_err.html | 13 +- docs/reference/index.html | 9 +- docs/reference/synthetic_data_for_UBA.html | 290 +++++++++++------------------ man/add_err.Rd | 5 +- man/synthetic_data_for_UBA.Rd | 82 +++++++- 12 files changed, 227 insertions(+), 235 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a86eeadd..d6de2e12 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: mkin Type: Package Title: Kinetic Evaluation of Chemical Degradation Data -Version: 0.9.46.2 -Date: 2017-10-10 +Version: 0.9.46.3 +Date: 2017-11-16 Authors@R: c(person("Johannes", "Ranke", role = c("aut", "cre", "cph"), email = "jranke@uni-bremen.de", comment = c(ORCID = "0000-0003-4371-6538")), diff --git a/NEWS.md b/NEWS.md index 32c8356b..dab95926 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# mkin 0.9.46.3 (2017-11-16) + +- `synthetic_data_for_UBA`: Add the code used to generate the data in the interest of reproducibility + # mkin 0.9.46.2 (2017-10-10) - Converted the vignette FOCUS_Z from tex/pdf to markdown/html diff --git a/R/add_err.R b/R/add_err.R index 4d998e94..3b98338d 100644 --- a/R/add_err.R +++ b/R/add_err.R @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2016 Johannes Ranke +# Copyright (C) 2015-2017 Johannes Ranke # Contact: jranke@uni-bremen.de # This file is part of the R package mkin @@ -16,7 +16,7 @@ # You should have received a copy of the GNU General Public License along with # this program. If not, see -add_err = function(prediction, sdfunc, +add_err = function(prediction, sdfunc, secondary = c("M1", "M2"), n = 1000, LOD = 0.1, reps = 2, digits = 1, seed = NA) { @@ -33,7 +33,7 @@ add_err = function(prediction, sdfunc, d_rep = data.frame(lapply(d_long, rep, each = 2)) d_rep$value = rnorm(length(d_rep$value), d_rep$value, sdfunc(d_rep$value)) - d_rep[d_rep$time == 0 & d_rep$name %in% c("M1", "M2"), "value"] <- 0 + d_rep[d_rep$time == 0 & d_rep$name %in% secondary, "value"] <- 0 # Set values below the LOD to NA d_NA <- transform(d_rep, value = ifelse(value < LOD, NA, value)) diff --git a/build.log b/build.log index 674097ea..a02e93ae 100644 --- a/build.log +++ b/build.log @@ -2,9 +2,4 @@ * preparing ‘mkin’: * checking DESCRIPTION meta-information ... OK * installing the package to build vignettes -* creating vignettes ... OK -* checking for LF line-endings in source and make files and shell scripts -* checking for empty or unneeded directories -* looking to see if a ‘data/datalist’ file should be added -* building ‘mkin_0.9.46.2.tar.gz’ - +* creating vignettes ... \ No newline at end of file diff --git a/docs/authors.html b/docs/authors.html index 5829abb8..7f3918c8 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -86,12 +86,7 @@ diff --git a/docs/index.html b/docs/index.html index 1b4d89a6..f228bbe2 100644 --- a/docs/index.html +++ b/docs/index.html @@ -62,14 +62,7 @@ News - + @@ -113,7 +106,7 @@
  • Highly flexible model specification using mkinmod, including equilibrium reactions and using the single first-order reversible binding (SFORB) model, which will automatically create two latent state variables for the observed variable.
  • As of version 0.9-39, fitting of several models to several datasets, optionally in parallel, is supported, see for example plot.mmkin.
  • Model solution (forward modelling) in the function mkinpredict is performed either using the analytical solution for the case of parent only degradation, an eigenvalue based solution if only simple first-order (SFO) or SFORB kinetics are used in the model, or using a numeric solver from the deSolve package (default is lsoda).
  • -
  • If a C compiler is installed, the kinetic models are compiled from automatically generated C code, see
    vignette compiled_models. The autogeneration of C code was inspired by the ccSolve package. Thanks to Karline Soetaert for her work on that.
  • +
  • If a C compiler is installed, the kinetic models are compiled from automatically generated C code, see vignette compiled_models. The autogeneration of C code was inspired by the ccSolve package. Thanks to Karline Soetaert for her work on that.
  • By default, kinetic rate constants and kinetic formation fractions are transformed internally using transform_odeparms so their estimators can more reasonably be expected to follow a normal distribution. This has the side effect that no constraints are needed in the optimisation. Thanks to René Lehmann for the nice cooperation on this, especially the isometric logration transformation that is now used for the formation fractions.
  • A side effect of this is that when parameter estimates are backtransformed to match the model definition, confidence intervals calculated from standard errors are also backtransformed to the correct scale, and will not include meaningless values like negative rate constants or formation fractions adding up to more than 1, which can not occur in a single experiment with a single defined radiolabel position.
  • The usual one-sided t-test for significant difference from zero is nevertheless shown based on estimators for the untransformed parameters.
  • @@ -158,8 +151,6 @@ diff --git a/docs/news/index.html b/docs/news/index.html index c3c66b9a..74b42514 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -86,12 +86,7 @@ @@ -108,9 +103,17 @@
    -
    +
    +

    +mkin 0.9.46.3 (2017-11-16)

    +
      +
    • +synthetic_data_for_UBA: Add the code used to generate the data in the interest of reproducibility
    • +
    +
    +

    -mkin 0.9.46.2

    +mkin 0.9.46.2 (2017-10-10)
    • Converted the vignette FOCUS_Z from tex/pdf to markdown/html

    • DESCRIPTION: Add ORCID

    • @@ -575,7 +578,8 @@

      Contents

    @@ -112,7 +107,7 @@ may depend on the predicted value and is specified as a standard deviation.

    -
    add_err(prediction, sdfunc,
    +    
    add_err(prediction, sdfunc, secondary = c("M1", "M2"),
               n = 1000, LOD = 0.1, reps = 2,
               digits = 1, seed = NA)
    @@ -129,6 +124,10 @@ a standard deviation that should be used for generating the random error terms for this value.

    + + secondary +

    The names of state variables that should have an initial value of zero

    + n

    The number of datasets to be generated.

    diff --git a/docs/reference/index.html b/docs/reference/index.html index 511a6aaa..dbcd5ba0 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -86,12 +86,7 @@
    @@ -105,7 +100,7 @@ diff --git a/docs/reference/synthetic_data_for_UBA.html b/docs/reference/synthetic_data_for_UBA.html index 173af92e..9ff18876 100644 --- a/docs/reference/synthetic_data_for_UBA.html +++ b/docs/reference/synthetic_data_for_UBA.html @@ -69,6 +69,9 @@
  • Example evaluation of FOCUS Laboratory Data L1 to L3
  • +
  • + Example evaluation of FOCUS Example Dataset Z +
  • Performance benefit by using compiled model definitions in mkin
  • @@ -83,12 +86,7 @@ @@ -109,7 +107,7 @@ two sequential or two parallel metabolites.

    Variance component 'a' is based on a normal distribution with standard deviation of 3, Variance component 'b' is also based on a normal distribution, but with a standard deviation of 7. - Variance component 'c' is based on the error model from Rocke and Lorenzato (1995), with the + Variance component 'c' is based on the error model from Rocke and Lorenzato (1995), with the minimum standard deviation (for small y values) of 0.5, and a proportionality constant of 0.07 for the increase of the standard deviation with y.

    Initial concentrations for metabolites and all values where adding the variance component resulted @@ -133,184 +131,116 @@

    Source

    Ranke (2014) Prüfung und Validierung von Modellierungssoftware als Alternative - zu ModelMaker 4.0, Umweltbundesamt Projektnummer 27452 - Rocke, David M. und Lorenzato, Stefan (1995) A two-component model for + zu ModelMaker 4.0, Umweltbundesamt Projektnummer 27452

    +

    Rocke, David M. und Lorenzato, Stefan (1995) A two-component model for measurement error in analytical chemistry. Technometrics 37(2), 176-184.

    Examples

    -m_synth_SFO_lin <- mkinmod(parent = list(type = "SFO", to = "M1"), - M1 = list(type = "SFO", to = "M2"), - M2 = list(type = "SFO"), use_of_ff = "max")
    #> Successfully compiled differential equation model from auto-generated C code.
    - -m_synth_SFO_par <- mkinmod(parent = list(type = "SFO", to = c("M1", "M2"), - sink = FALSE), - M1 = list(type = "SFO"), - M2 = list(type = "SFO"), use_of_ff = "max")
    #> Successfully compiled differential equation model from auto-generated C code.
    -m_synth_DFOP_lin <- mkinmod(parent = list(type = "DFOP", to = "M1"), - M1 = list(type = "SFO", to = "M2"), - M2 = list(type = "SFO"), use_of_ff = "max")
    #> Successfully compiled differential equation model from auto-generated C code.
    -m_synth_DFOP_par <- mkinmod(parent = list(type = "DFOP", to = c("M1", "M2"), - sink = FALSE), - M1 = list(type = "SFO"), - M2 = list(type = "SFO"), use_of_ff = "max")
    #> Successfully compiled differential equation model from auto-generated C code.
    -fit <- mkinfit(m_synth_SFO_lin, synthetic_data_for_UBA_2014[[1]]$data, quiet = TRUE) -plot_sep(fit)
    summary(fit)
    #> mkin version: 0.9.46 -#> R version: 3.4.1 -#> Date of fit: Sat Jul 29 15:15:33 2017 -#> Date of summary: Sat Jul 29 15:15:34 2017 -#> -#> Equations: -#> d_parent/dt = - k_parent * parent -#> d_M1/dt = + f_parent_to_M1 * k_parent * parent - k_M1 * M1 -#> d_M2/dt = + f_M1_to_M2 * k_M1 * M1 - k_M2 * M2 -#> -#> Model predictions using solution type deSolve -#> -#> Fitted with method Port using 381 model solutions performed in 2.241 s -#> -#> Weighting: none -#> -#> Starting values for parameters to be optimised: -#> value type -#> parent_0 101.3500 state -#> k_parent 0.1000 deparm -#> k_M1 0.1001 deparm -#> k_M2 0.1002 deparm -#> f_parent_to_M1 0.5000 deparm -#> f_M1_to_M2 0.5000 deparm -#> -#> Starting values for the transformed parameters actually optimised: -#> value lower upper -#> parent_0 101.350000 -Inf Inf -#> log_k_parent -2.302585 -Inf Inf -#> log_k_M1 -2.301586 -Inf Inf -#> log_k_M2 -2.300587 -Inf Inf -#> f_parent_ilr_1 0.000000 -Inf Inf -#> f_M1_ilr_1 0.000000 -Inf Inf -#> -#> Fixed parameter values: -#> value type -#> M1_0 0 state -#> M2_0 0 state -#> -#> Optimised, transformed parameters with symmetric confidence intervals: -#> Estimate Std. Error Lower Upper -#> parent_0 102.1000 1.71400 98.5800 105.5000 -#> log_k_parent -0.3020 0.04294 -0.3894 -0.2147 -#> log_k_M1 -1.2070 0.07599 -1.3610 -1.0520 -#> log_k_M2 -3.9010 0.06952 -4.0420 -3.7590 -#> f_parent_ilr_1 0.8492 0.18090 0.4812 1.2170 -#> f_M1_ilr_1 0.6780 0.18860 0.2943 1.0620 -#> -#> Parameter correlation: -#> parent_0 log_k_parent log_k_M1 log_k_M2 f_parent_ilr_1 -#> parent_0 1.00000 0.40213 -0.1693 0.02912 -0.4726 -#> log_k_parent 0.40213 1.00000 -0.4210 0.07241 -0.5837 -#> log_k_M1 -0.16931 -0.42103 1.0000 -0.37657 0.7438 -#> log_k_M2 0.02912 0.07241 -0.3766 1.00000 -0.2518 -#> f_parent_ilr_1 -0.47263 -0.58367 0.7438 -0.25177 1.0000 -#> f_M1_ilr_1 0.17148 0.42643 -0.8054 0.52647 -0.8602 -#> f_M1_ilr_1 -#> parent_0 0.1715 -#> log_k_parent 0.4264 -#> log_k_M1 -0.8054 -#> log_k_M2 0.5265 -#> f_parent_ilr_1 -0.8602 -#> f_M1_ilr_1 1.0000 -#> -#> Residual standard error: 2.471 on 33 degrees of freedom -#> -#> Backtransformed parameters: -#> Confidence intervals for internally transformed parameters are asymmetric. -#> t-test (unrealistically) based on the assumption of normal distribution -#> for estimators of untransformed parameters. -#> Estimate t value Pr(>t) Lower Upper -#> parent_0 102.10000 59.55 1.815e-35 98.58000 105.5000 -#> k_parent 0.73930 23.29 2.337e-22 0.67750 0.8068 -#> k_M1 0.29920 13.16 5.552e-15 0.25630 0.3492 -#> k_M2 0.02023 14.38 4.497e-16 0.01756 0.0233 -#> f_parent_to_M1 0.76870 16.90 4.093e-18 0.66380 0.8483 -#> f_M1_to_M2 0.72290 13.53 2.557e-15 0.60260 0.8178 -#> -#> Chi2 error levels in percent: -#> err.min n.optim df -#> All data 8.454 6 17 -#> parent 8.660 2 6 -#> M1 10.583 2 5 -#> M2 3.586 2 6 -#> -#> Resulting formation fractions: -#> ff -#> parent_M1 0.7687 -#> parent_sink 0.2313 -#> M1_M2 0.7229 -#> M1_sink 0.2771 -#> -#> Estimated disappearance times: -#> DT50 DT90 -#> parent 0.9376 3.114 -#> M1 2.3170 7.697 -#> M2 34.2689 113.839 -#> -#> Data: -#> time variable observed predicted residual -#> 0 parent 101.5 1.021e+02 -0.56248 -#> 0 parent 101.2 1.021e+02 -0.86248 -#> 1 parent 53.9 4.873e+01 5.17118 -#> 1 parent 47.5 4.873e+01 -1.22882 -#> 3 parent 10.4 1.111e+01 -0.70773 -#> 3 parent 7.6 1.111e+01 -3.50773 -#> 7 parent 1.1 5.772e-01 0.52283 -#> 7 parent 0.3 5.772e-01 -0.27717 -#> 14 parent NA 3.264e-03 NA -#> 14 parent 3.5 3.264e-03 3.49674 -#> 28 parent NA 1.045e-07 NA -#> 28 parent 3.2 1.045e-07 3.20000 -#> 60 parent NA -1.054e-10 NA -#> 60 parent NA -1.054e-10 NA -#> 90 parent 0.6 -1.875e-11 0.60000 -#> 90 parent NA -1.875e-11 NA -#> 120 parent NA -2.805e-11 NA -#> 120 parent 3.5 -2.805e-11 3.50000 -#> 0 M1 NA 0.000e+00 NA -#> 0 M1 NA 0.000e+00 NA -#> 1 M1 36.4 3.479e+01 1.61088 -#> 1 M1 37.4 3.479e+01 2.61088 -#> 3 M1 34.3 3.937e+01 -5.07027 -#> 3 M1 39.8 3.937e+01 0.42973 -#> 7 M1 15.1 1.549e+01 -0.38715 -#> 7 M1 17.8 1.549e+01 2.31285 -#> 14 M1 5.8 1.995e+00 3.80469 -#> 14 M1 1.2 1.995e+00 -0.79531 -#> 28 M1 NA 3.034e-02 NA -#> 28 M1 NA 3.034e-02 NA -#> 60 M1 0.5 2.111e-06 0.50000 -#> 60 M1 NA 2.111e-06 NA -#> 90 M1 NA 2.913e-10 NA -#> 90 M1 3.2 2.913e-10 3.20000 -#> 120 M1 1.5 3.625e-11 1.50000 -#> 120 M1 0.6 3.625e-11 0.60000 -#> 0 M2 NA 0.000e+00 NA -#> 0 M2 NA 0.000e+00 NA -#> 1 M2 NA 4.455e+00 NA -#> 1 M2 4.8 4.455e+00 0.34517 -#> 3 M2 20.9 2.153e+01 -0.62527 -#> 3 M2 19.3 2.153e+01 -2.22527 -#> 7 M2 42.0 4.192e+01 0.07941 -#> 7 M2 43.1 4.192e+01 1.17941 -#> 14 M2 49.4 4.557e+01 3.83353 -#> 14 M2 44.3 4.557e+01 -1.26647 -#> 28 M2 34.6 3.547e+01 -0.87275 -#> 28 M2 33.0 3.547e+01 -2.47275 -#> 60 M2 18.8 1.858e+01 0.21837 -#> 60 M2 17.6 1.858e+01 -0.98163 -#> 90 M2 10.6 1.013e+01 0.47130 -#> 90 M2 10.8 1.013e+01 0.67130 -#> 120 M2 9.8 5.521e+00 4.27893 -#> 120 M2 3.3 5.521e+00 -2.22107
    -
    +# The data have been generated using the following kinetic models +m_synth_SFO_lin <- mkinmod(parent = list(type = "SFO", to = "M1"), + M1 = list(type = "SFO", to = "M2"), + M2 = list(type = "SFO"), use_of_ff = "max") + + +m_synth_SFO_par <- mkinmod(parent = list(type = "SFO", to = c("M1", "M2"), + sink = FALSE), + M1 = list(type = "SFO"), + M2 = list(type = "SFO"), use_of_ff = "max") + +m_synth_DFOP_lin <- mkinmod(parent = list(type = "DFOP", to = "M1"), + M1 = list(type = "SFO", to = "M2"), + M2 = list(type = "SFO"), use_of_ff = "max") + +m_synth_DFOP_par <- mkinmod(parent = list(type = "DFOP", to = c("M1", "M2"), + sink = FALSE), + M1 = list(type = "SFO"), + M2 = list(type = "SFO"), use_of_ff = "max") + +# The model predictions without intentional error were generated as follows +sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) + +d_synth_SFO_lin <- mkinpredict(m_synth_SFO_lin, + c(k_parent = 0.7, f_parent_to_M1 = 0.8, + k_M1 = 0.3, f_M1_to_M2 = 0.7, + k_M2 = 0.02), + c(parent = 100, M1 = 0, M2 = 0), + sampling_times) + +d_synth_DFOP_lin <- mkinpredict(m_synth_DFOP_lin, + c(k1 = 0.2, k2 = 0.02, g = 0.5, + f_parent_to_M1 = 0.5, k_M1 = 0.3, + f_M1_to_M2 = 0.7, k_M2 = 0.02), + c(parent = 100, M1 = 0, M2 = 0), + sampling_times) + +d_synth_SFO_par <- mkinpredict(m_synth_SFO_par, + c(k_parent = 0.2, + f_parent_to_M1 = 0.8, k_M1 = 0.01, + f_parent_to_M2 = 0.2, k_M2 = 0.02), + c(parent = 100, M1 = 0, M2 = 0), + sampling_times) + +d_synth_DFOP_par <- mkinpredict(m_synth_DFOP_par, + c(k1 = 0.3, k2 = 0.02, g = 0.7, + f_parent_to_M1 = 0.6, k_M1 = 0.04, + f_parent_to_M2 = 0.4, k_M2 = 0.01), + c(parent = 100, M1 = 0, M2 = 0), + sampling_times) + +# Construct names for datasets with errors +d_synth_names = paste0("d_synth_", c("SFO_lin", "SFO_par", + "DFOP_lin", "DFOP_par")) + +# Function for adding errors. The add_err function now published with this +# package is a slightly generalised version where the names of secondary +# compartments that should have an initial value of zero (M1 and M2 in this +# case) are not hardcoded any more. +add_err = function(d, sdfunc, LOD = 0.1, reps = 2, seed = 123456789) +{ + set.seed(seed) + d_long = mkin_wide_to_long(d, time = "time") + d_rep = data.frame(lapply(d_long, rep, each = 2)) + d_rep$value = rnorm(length(d_rep$value), d_rep$value, sdfunc(d_rep$value)) + + d_rep[d_rep$time == 0 & d_rep$name + d_NA <- transform(d_rep, value = ifelse(value < LOD, NA, value)) + d_NA$value <- round(d_NA$value, 1) + return(d_NA) +} + +# The following is the two-component model of Rocke and Lorenzato (1995) +sdfunc_twocomp = function(value, sd_low, rsd_high) { + sqrt(sd_low^2 + value^2 * rsd_high^2) +} + +# Add the errors. +for (d_synth_name in d_synth_names) +{ + d_synth = get(d_synth_name) + assign(paste0(d_synth_name, "_a"), add_err(d_synth, function(value) 3)) + assign(paste0(d_synth_name, "_b"), add_err(d_synth, function(value) 7)) + assign(paste0(d_synth_name, "_c"), add_err(d_synth, + function(value) sdfunc_twocomp(value, 0.5, 0.07))) + +} + +d_synth_err_names = c( + paste(rep(d_synth_names, each = 3), letters[1:3], sep = "_") +) + +# This is just one example of an evaluation using the kinetic model used for +# the generation of the data +fit <- mkinfit(m_synth_SFO_lin, synthetic_data_for_UBA_2014[[1]]$data, + quiet = TRUE) +plot_sep(fit) +summary(fit) + +
    #> Error: <text>:68:43: Unerwartete(s) SPECIAL +#> 67: +#> 68: d_rep[d_rep$time == 0 & d_rep$name <!-- %in% +#> ^