From 280d36230052de4f94e384648c1283031fbc9840 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Tue, 17 Jul 2018 17:29:14 +0200 Subject: Fix inverse predictions for replicate measurements For details, see NEWS.md --- .Rbuildignore | 13 +- DESCRIPTION | 4 +- GNUmakefile | 35 +++- NEWS.md | 14 +- TODO | 5 - _pkgdown.yml | 9 - build.log | 11 ++ data/rl95_toluene.rda | Bin 0 -> 328 bytes docs/articles/chemCal.html | 209 +++++++++++++++++++++ .../figure-html/unnamed-chunk-1-1.png | Bin 0 -> 92097 bytes .../figure-html/unnamed-chunk-2-1.png | Bin 0 -> 54434 bytes .../figure-html/unnamed-chunk-3-1.png | Bin 0 -> 54434 bytes docs/articles/index.html | 54 ++++-- docs/authors.html | 15 +- docs/index.html | 17 +- docs/news/index.html | 153 +++++++++++++++ docs/pkgdown.yml | 3 +- docs/reference/calplot.lm.html | 19 +- docs/reference/chemCal-package.html | 15 +- docs/reference/din32645.html | 58 +++--- docs/reference/index.html | 27 ++- docs/reference/inverse.predict.html | 61 +++++- docs/reference/lod.html | 26 ++- docs/reference/loq.html | 31 +-- docs/reference/massart97ex1.html | 15 +- docs/reference/massart97ex3.html | 63 ++++--- docs/reference/rl95_toluene.html | 153 +++++++++++++++ docs/reference/utstats14.html | 154 +++++++++++++++ man/din32645.Rd | 13 +- man/inverse.predict.Rd | 23 ++- man/lod.Rd | 1 - man/loq.Rd | 4 +- man/massart97ex3.Rd | 31 +-- man/rl95_toluene.Rd | 16 ++ tests/testthat/test_compare_investr.R | 30 +++ tests/testthat/test_inverse.predict.R | 23 ++- tests/testthat/test_lod_loq.R | 6 +- tests/testthat/test_massart.R | 22 ++- vignettes/chemCal.R | 39 ++-- vignettes/chemCal.Rmd | 97 ++++++---- vignettes/chemCal.html | 57 ++++-- vignettes/figure/unnamed-chunk-1-1.pdf | Bin 12114 -> 0 bytes vignettes/figure/unnamed-chunk-2-1.pdf | Bin 5027 -> 0 bytes vignettes/references.bib | 35 ++++ vignettes/refs.bib | 7 - 45 files changed, 1272 insertions(+), 296 deletions(-) delete mode 100644 TODO create mode 100644 build.log create mode 100644 data/rl95_toluene.rda create mode 100644 docs/articles/chemCal.html create mode 100644 docs/articles/chemCal_files/figure-html/unnamed-chunk-1-1.png create mode 100644 docs/articles/chemCal_files/figure-html/unnamed-chunk-2-1.png create mode 100644 docs/articles/chemCal_files/figure-html/unnamed-chunk-3-1.png create mode 100644 docs/news/index.html create mode 100644 docs/reference/rl95_toluene.html create mode 100644 docs/reference/utstats14.html create mode 100644 man/rl95_toluene.Rd create mode 100644 tests/testthat/test_compare_investr.R delete mode 100644 vignettes/figure/unnamed-chunk-1-1.pdf delete mode 100644 vignettes/figure/unnamed-chunk-2-1.pdf create mode 100644 vignettes/references.bib delete mode 100644 vignettes/refs.bib diff --git a/.Rbuildignore b/.Rbuildignore index c7ccaca..78e037c 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,11 +1,6 @@ GNUmakefile -out$ -toc$ -bbl$ -blg$ -aux$ -log$ -vignettes/chemCal.tex -vignettes/chemCal.pdf -vignettes/chemCal.R +build.log$ chemCal_.*.tar.gz +docs +_pkgdown.yml +README.html diff --git a/DESCRIPTION b/DESCRIPTION index 7610ff0..2e1f741 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,11 +1,11 @@ Package: chemCal Version: 0.2.1 -Date: 2018-07-05 +Date: 2018-07-17 Title: Calibration Functions for Analytical Chemistry Authors@R: c(person("Johannes", "Ranke", role = c("aut", "cre", "cph"), email = "jranke@uni-bremen.de", comment = c(ORCID = "0000-0003-4371-6538"))) -Suggests: MASS, knitr, testthat +Suggests: MASS, knitr, testthat, investr Description: Simple functions for plotting linear calibration functions and estimating standard errors for measurements according to the Handbook of Chemometrics and Qualimetrics: Part A diff --git a/GNUmakefile b/GNUmakefile index 3f16fbc..7992c5e 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -8,27 +8,44 @@ TGZ := $(PKGNAME)_$(PKGVERS).tar.gz # If no alternate bin folder is specified, the default is to use the folder # containing the first instance of R on the PATH. RBIN ?= $(shell dirname "`which R`") +# +# Vignettes are listed in the build target +pkgfiles = \ + GNUmakefile \ + .Rbuildignore \ + data/* \ + DESCRIPTION \ + man/* \ + NAMESPACE \ + NEWS.md \ + README.html \ + R/* \ + tests/* \ + tests/testthat* + +all: build + +$(TGZ): $(pkgfiles) vignettes + "$(RBIN)/R" CMD build . 2>&1 | tee build.log + +build: $(TGZ) README.html: README.md "$(RBIN)/Rscript" -e "rmarkdown::render('README.md', output_format = 'html_document')" -build: - "$(RBIN)/R" CMD build . - install: build "$(RBIN)/R" CMD INSTALL $(TGZ) check: build "$(RBIN)/R" CMD check --as-cran $(TGZ) -vignettes/%.html: vignettes/%.Rmd - "$(RBIN)/Rscript" -e "tools::buildVignette(file = 'vignettes/$*.Rnw', dir = 'vignettes')" +vignettes/%.html: vignettes/%.Rmd vignettes/refs.bib + "$(RBIN)/Rscript" -e "tools::buildVignette(file = 'vignettes/$*.Rmd', dir = 'vignettes')" vignettes: vignettes/chemCal.html pd: "$(RBIN)/Rscript" -e "pkgdown::build_site()" - cp vignettes/chemCal.pdf docs/articles git add -A git commit -m 'Static documentation rebuilt by pkgdown::build_site()' -e @@ -40,3 +57,9 @@ winbuilder: build test: install NOT_CRAN=true "$(RBIN)/Rscript" -e 'devtools::test()' + +clean: + $(RM) -r vignettes/*_cache + $(RM) -r vignettes/*_files + $(RM) -r vignettes/*.R + $(RM) Rplots.pdf diff --git a/NEWS.md b/NEWS.md index 43a9d25..2c34f05 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,12 +1,16 @@ -# chemCal 0.2.1 (2018-07-16) +# chemCal 0.2.1 (2018-07-17) -- 'lod' and 'loq': In the lists that are returned, return the list component 'y' without names, because we always have a single element in 'y' and the name '1' is meaningless +- 'inverse.predict': Do not work on the means of the calibration standards any more, as this ignores the variability of y values about the means. Thanks to Anna Burniol Figols for pointing out this issue -- Use testthat for tests to simplify further development +- Use testthat for tests to simplify development. Adapt the tests using data with replicate standard measurements to work on the means in order to show the relation to 'inverse.predict' from earlier versions. Include comparisons with investr::calibrate(method = 'Wald') for unweighted regressions. Include tests with more precision to check for changes in numerical output across versions. -- Convert vignette to html +- 'lod' and 'loq': In the lists that are returned, return the list component 'y' without names, because we always only have a single element in 'y' (previously the name '1' was returned). -- Add another example dataset, from an online course at the University of Toronto +- Convert vignette to html and explain the changes to 'inverse.predict' + +- Add two example dataset, one from an online course at the University of Toronto, one from Rocke and Lorenzato (1995) + +- Update static documentation # chemCal 0.1-33 (2014-04-24) diff --git a/TODO b/TODO deleted file mode 100644 index 1fc44a7..0000000 --- a/TODO +++ /dev/null @@ -1,5 +0,0 @@ -- lod(): At the moment, it is not possible to calculate an lod for the - case of more than one replicates (m is fixed to 1). The formula - for the prediction of y from mean(x) in Massart et al, p. 433 - could be used to generalize this. -- Write methods for nonlinear calibration functions diff --git a/_pkgdown.yml b/_pkgdown.yml index edc41e2..f5eeff8 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -2,12 +2,3 @@ title: chemCal template: bootswatch: spacelab - -navbar: - title: ~ - type: default - left: - - text: Functions and data - href: reference/index.html - - text: Short manual (pdf) - href: articles/chemCal.pdf diff --git a/build.log b/build.log new file mode 100644 index 0000000..8e55d71 --- /dev/null +++ b/build.log @@ -0,0 +1,11 @@ +* checking for file ‘./DESCRIPTION’ ... OK +* preparing ‘chemCal’: +* checking DESCRIPTION meta-information ... OK +* installing the package to build vignettes +* creating vignettes ... OK +* checking for LF line-endings in source and make files and shell scripts +* checking for empty or unneeded directories +* looking to see if a ‘data/datalist’ file should be added +* re-saving image files +* building ‘chemCal_0.2.1.tar.gz’ + diff --git a/data/rl95_toluene.rda b/data/rl95_toluene.rda new file mode 100644 index 0000000..4b213be Binary files /dev/null and b/data/rl95_toluene.rda differ diff --git a/docs/articles/chemCal.html b/docs/articles/chemCal.html new file mode 100644 index 0000000..29db7c8 --- /dev/null +++ b/docs/articles/chemCal.html @@ -0,0 +1,209 @@ + + + + + + + +Introduction to chemCal • chemCal + + + + + + + + + +
+
+ + + +
+
+ + + + +
+

+Basic calibration functions

+

The chemCal package was first designed in the course of a lecture and lab course on “Analytics of Organic Trace Contaminants” at the University of Bremen from October to December 2004. In the fall 2005, an email exchange with Ron Wehrens led to the belief that it would be desirable to implement the inverse prediction method given in Massart et al. (1997) since it also covers the case of weighted regression. Studies of the IUPAC orange book and of DIN 32645 (equivalent to ISO 11843), publications by Currie (1997) and the Analytical Method Committee of the Royal Society of Chemistry (Analytical Methods Committee 1989) and a nice paper by Castells and Castillo (Castells and Castillo 2000) provided some further understanding of the matter.

+

At the moment, the package consists of four functions (calplot, lod, loq and inverse.predict), working on univariate linear models of class lm or rlm, plus several datasets for validation.

+

A bug report and the following e-mail exchange on the r-devel mailing list about prediction intervals from weighted regression entailed some further studies on this subject. However, I did not encounter any proof or explanation of the formula cited below yet, so I can’t really confirm that Massart’s method is correct.

+

In fact, in June 2018 I was made aware of the fact that the inverse prediction method implemented in chemCal version 0.1.37 and before did not take the variance of replicate calibration standards about their means into account, nor the number of replicates when calculating the degrees of freedom. Thanks to PhD student Anna Burniol Figols for reporting this issue!

+

As a consequence, I rewrote inverse.predict not to automatically work with the mean responses for each calibration standard any more. The example calculations from Massart et al. (1997) can still be reproduced when the regression model is calculated using the means of the calibration data as shown below.

+
+
+

+Usage

+

When calibrating an analytical method, the first task is to generate a suitable model. If we want to use the chemCal functions, we have to restrict ourselves to univariate, possibly weighted, linear regression so far.

+

Once such a model has been created, the calibration can be graphically shown by using the calplot function:

+
library(chemCal)
+m0 <- lm(y ~ x, data = massart97ex3)
+calplot(m0)
+

+

As we can see, the scatter increases with increasing x. This is also illustrated by one of the diagnostic plots for linear models provided by R:

+
plot(m0, which=3)
+

+

Therefore, in Example 8 in Massart et al. (1997), weighted regression is proposed which can be reproduced by the following code. Note that we are building the model on the mean values for each standard in order to be able to reproduce the results given in the book with the current version of chemCal.

+
weights <- with(massart97ex3, {
+  yx <- split(y, x)
+  ybar <- sapply(yx, mean)
+  s <- round(sapply(yx, sd), digits = 2)
+  w <- round(1 / (s^2), digits = 3)
+})
+massart97ex3.means <- aggregate(y ~ x, massart97ex3, mean)
+
+m <- lm(y ~ x, w = weights, data = massart97ex3.means)
+

If we now want to predict a new x value from measured y values, we use the inverse.predict function:

+
inverse.predict(m, 15, ws=1.67)
+
## $Prediction
+## [1] 5.865367
+## 
+## $`Standard Error`
+## [1] 0.8926109
+## 
+## $Confidence
+## [1] 2.478285
+## 
+## $`Confidence Limits`
+## [1] 3.387082 8.343652
+
inverse.predict(m, 90, ws = 0.145)
+
## $Prediction
+## [1] 44.06025
+## 
+## $`Standard Error`
+## [1] 2.829162
+## 
+## $Confidence
+## [1] 7.855012
+## 
+## $`Confidence Limits`
+## [1] 36.20523 51.91526
+

The weight ws assigned to the measured y value has to be given by the user in the case of weighted regression, or alternatively, the approximate variance var.s at this location.

+
+
+

+Background for inverse.predict +

+

Equation 8.28 in Massart et al. (1997) gives a general equation for predicting the standard error \(s_{\hat{x_s}}\) for an \(x\) value predicted from measurements of \(y\) according to the linear calibration function \(y = b_0 + b_1 \cdot x\):

+\[\begin{equation} +s_{\hat{x_s}} = \frac{s_e}{b_1} \sqrt{\frac{1}{w_s m} + \frac{1}{\sum{w_i}} + + \frac{(\bar{y_s} - \bar{y_w})^2 \sum{w_i}} + {{b_1}^2 \left( \sum{w_i} \sum{w_i {x_i}^2} - + {\left( \sum{ w_i x_i } \right)}^2 \right) }} +\end{equation}\] +

with

+\[\begin{equation} +s_e = \sqrt{ \frac{\sum w_i (y_i - \hat{y_i})^2}{n - 2}} +\end{equation}\] +

In chemCal version before 0.2, I interpreted \(w_i\) to be the weight for calibration standard \(i\), \(y_i\) to be the mean value observed for standard \(i\), and \(n\) to be the number of calibration standards. With this implementation I was able to reproduce the examples given in the book. However, as noted above, I was made aware of the fact that this way of calculation does not take the variation of the y values about the means into account. Furthermore, I noticed that for the case of unweighted linear calibration with replicate standards, inverse.predict produced different results than calibrate from the investr package when using the Wald method.

+

Both issues are now addressed in chemCal starting from version 0.2.1. Here, \(y_i\) is calibration measurement \(i\), \(\hat{y_i}\) is the estimated value for calibration measurement \(i\) and \(n\) is the total number of calibration measurements.

+

\(w_s\) is the weight attributed to the sample \(s\), \(m\) is the number of replicate measurements of sample \(s\), \(\bar{y_s}\) is the mean response for the sample, \(\bar{y_w} = \frac{\sum{w_i y_i}}{\sum{w_i}}\) is the weighted mean of responses \(y_i\), and \(x_i\) is the given \(x\) value for standard \(i\).

+

The weight \(w_s\) for the sample should be estimated or calculated in accordance to the weights used in the linear regression.

+

I had also adjusted the above equation in order to be able to take a different precisions in standards and samples into account. In analogy to Equation 8.26 from I am using

+\[\begin{equation} +s_{\hat{x_s}} = \frac{1}{b_1} \sqrt{\frac{{s_s}^2}{w_s m} + + {s_e}^2 \left( \frac{1}{\sum{w_i}} + + \frac{(\bar{y_s} - \bar{y_w})^2 \sum{w_i}} + {{b_1}^2 \left( \sum{w_i} \sum{w_i {x_i}^2} - {\left( \sum{ w_i x_i } \right)}^2 \right) } \right) } +\end{equation}\] +

where I interpret \(\frac{{s_s}^2}{w_s}\) as an estimator of the variance at location \(\hat{x_s}\), which can be replaced by a user-specified value using the argument var.s of the function inverse.predict.

+
+
+

Analytical Methods Committee. 1989. “Robust Statistics — How Not to Reject Outliers. Part 1. Basic Concepts.” The Analyst 114: 1693–7.

+
+
+

Castells, Reynaldo César, and Marcela Alejandra Castillo. 2000. “Systematic Errors: Detection and Correction by Means of Standard Calibration, Youden Calibration and Standard Additions Method in Conjunction with a Method Response Model.” Analytica Chimica Acta 423: 179–85.

+
+
+

Currie, L. A. 1997. “Nomenclature in Evaluation of Analytical Methods Including Detection and Quantification Capabilities (IUPAC Recommendations 1995).” Analytica Chimica Acta 391: 105–26.

+
+
+

Massart, D. L, B. G. M. Vandeginste, L. M. C. Buydens, S. De Jong, P. J. Lewi, and J Smeyers-Verbeke. 1997. Handbook of Chemometrics and Qualimetrics: Part A. Amsterdam: Elsevier.

+
+
+
+
+ + + +
+ + +
+ +
+

Site built with pkgdown.

+
+ +
+
+ + + + + diff --git a/docs/articles/chemCal_files/figure-html/unnamed-chunk-1-1.png b/docs/articles/chemCal_files/figure-html/unnamed-chunk-1-1.png new file mode 100644 index 0000000..62c95ca Binary files /dev/null and b/docs/articles/chemCal_files/figure-html/unnamed-chunk-1-1.png differ diff --git a/docs/articles/chemCal_files/figure-html/unnamed-chunk-2-1.png b/docs/articles/chemCal_files/figure-html/unnamed-chunk-2-1.png new file mode 100644 index 0000000..464048c Binary files /dev/null and b/docs/articles/chemCal_files/figure-html/unnamed-chunk-2-1.png differ diff --git a/docs/articles/chemCal_files/figure-html/unnamed-chunk-3-1.png b/docs/articles/chemCal_files/figure-html/unnamed-chunk-3-1.png new file mode 100644 index 0000000..464048c Binary files /dev/null and b/docs/articles/chemCal_files/figure-html/unnamed-chunk-3-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index 2e993ac..a29b7c6 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -18,23 +18,35 @@ + + + + + - + + + + + + - + + + -
+
@@ -66,17 +92,18 @@ - -
-
+
+ +

All vignettes

@@ -88,11 +115,14 @@
-

Site built with pkgdown.

+

Site built with pkgdown.

+ + + diff --git a/docs/authors.html b/docs/authors.html index c9890cd..ea55846 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -58,17 +58,26 @@ chemCal - 0.1.37.9001 + 0.2.1
diff --git a/docs/news/index.html b/docs/news/index.html new file mode 100644 index 0000000..62ab315 --- /dev/null +++ b/docs/news/index.html @@ -0,0 +1,153 @@ + + + + + + + + +Changelog • chemCal + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+ + +
+

+chemCal 0.2.1 (2018-07-17) Unreleased +

+
    +
  • ‘inverse.predict’: Do not work on the means of the calibration standards any more, as this ignores the variability of y values about the means

  • +
  • Use testthat for tests to simplify further development. Adapt the tests using data with replicate standard measurements to work on the means in order to show the relation to ‘inverse.predict’ from earlier versions. Include comparisons with investr::calibrate(method = ‘Wald’) for unweighted regressions. Include tests with more precision to check for changes in numerical output across versions.

  • +
  • ‘lod’ and ‘loq’: In the lists that are returned, return the list component ‘y’ without names, because we always only have a single element in ‘y’ (previously the name ‘1’ was returned).

  • +
  • Convert vignette to html and explain the changes to ‘inverse.predict’

  • +
  • Add two example dataset, one from an online course at the University of Toronto, one from Rocke and Lorenzato (1995)

  • +
+
+
+

+chemCal 0.1-33 (2014-04-24) Unreleased +

+
    +
  • Bugfix in lod() and loq(): In the case of small absolute x values (e.g. on the order of 1e-4 and smaller), the lod or loq calculated using the default method could produce inaccurate results as the default tolerance that was used in the internal call to optimize is inappropriate in such cases. Now a reasonable default is used which can be overriden by the user. Thanks to Jérôme Ambroise for reporting the bug.
  • +
+

For a detailed list of changes to the chemCal source please consult the commit history on https://cgit.jrwb.de/chemCal

+
+
+ + + +
+ +
+ + +
+

Site built with pkgdown.

+
+ +
+
+ + + + + + diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 6e67788..3b5e371 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,5 +1,6 @@ pandoc: 1.19.2.4 pkgdown: 1.1.0 pkgdown_sha: ~ -articles: [] +articles: + chemCal: chemCal.html diff --git a/docs/reference/calplot.lm.html b/docs/reference/calplot.lm.html index 7eda066..a5b3bbb 100644 --- a/docs/reference/calplot.lm.html +++ b/docs/reference/calplot.lm.html @@ -62,17 +62,26 @@ chemCal - 0.1.37.9001 + 0.2.1
+# For reproducing the results for replicate standard measurements in example 8, +# we need to do the calibration on the means when using chemCal > 0.2 +weights <- with(massart97ex3, { + yx <- split(y, x) + ybar <- sapply(yx, mean) + s <- round(sapply(yx, sd), digits = 2) + w <- round(1 / (s^2), digits = 3) +}) + +massart97ex3.means <- aggregate(y ~ x, massart97ex3, mean) + +m3.means <- lm(y ~ x, w = weights, data = massart97ex3.means) + +inverse.predict(m3.means, 15, ws = 1.67) # 5.9 +- 2.5
#> $Prediction +#> [1] 5.865367 +#> +#> $`Standard Error` +#> [1] 0.8926109 +#> +#> $Confidence +#> [1] 2.478285 +#> +#> $`Confidence Limits` +#> [1] 3.387082 8.343652 +#>
inverse.predict(m3.means, 90, ws = 0.145) # 44.1 +- 7.9
#> $Prediction +#> [1] 44.06025 +#> +#> $`Standard Error` +#> [1] 2.829162 +#> +#> $Confidence +#> [1] 7.855012 +#> +#> $`Confidence Limits` +#> [1] 36.20523 51.91526 +#>
+