From 9e0dae397df8c18e7333d2604cae96b2a7927420 Mon Sep 17 00:00:00 2001 From: ranke Date: Fri, 23 Jun 2006 15:33:27 +0000 Subject: - inverse.predict now has a var.s argument instead of the never tested ss argument. This is documented in the updated vignette - loq() now has w.loq and var.loq arguments, and stops with a message if neither are specified and the model has weights. - calplot doesn't stop any more for weighted regression models, but only refrains from drawing prediction bands - Added method = "din" to lod(), now that I actually have it (DIN 32645) and was able to see which approximation is used therein. - removed the demos, as the examples and tests are already partially duplicated - The vignette is more of a collection of various notes, but should certainly be helpful for the user. - Version bump to 0.1-xxx git-svn-id: http://kriemhild.uft.uni-bremen.de/svn/chemCal@16 5fad18fb-23f0-0310-ab10-e59a3bee62b4 --- DESCRIPTION | 4 +-- R/calplot.R | 23 ++++++++-------- R/inverse.predict.lm.R | 35 ++++++++++++----------- R/lod.R | 40 +++++++++++++++------------ R/loq.R | 18 +++++++++--- demo/00Index | 1 - demo/massart97ex8.R | 12 -------- inst/doc/Makefile | 2 +- inst/doc/chemCal-001.pdf | 4 +-- inst/doc/chemCal-002.pdf | 4 +-- inst/doc/chemCal.Rnw | 59 ++++++++++++++++++++++++--------------- inst/doc/chemCal.aux | 1 + inst/doc/chemCal.log | 69 ++++++++++++++++++++++------------------------ inst/doc/chemCal.pdf | Bin 133895 -> 123681 bytes inst/doc/chemCal.tex | 47 ++++++++++++++++++++----------- man/calplot.lm.Rd | 12 ++++---- man/din32645.Rd | 45 ++++++++++++++++-------------- man/inverse.predict.Rd | 25 +++++++++-------- man/lod.Rd | 20 ++++++++++---- man/loq.Rd | 58 ++++++++++++++++++-------------------- man/massart97ex3.Rd | 59 +++++++++++++++++++++------------------ tests/din32645.R | 6 ++-- tests/din32645.Rout.save | 10 ++++--- tests/massart97.R | 25 +++++++++++------ tests/massart97.Rout.save | 47 ++++++++++++++++++++++++------- 25 files changed, 355 insertions(+), 271 deletions(-) delete mode 100644 demo/00Index delete mode 100644 demo/massart97ex8.R diff --git a/DESCRIPTION b/DESCRIPTION index b42a9e5..2ac2a63 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: chemCal -Version: 0.05-14 -Date: 2006-05-23 +Version: 0.1-16 +Date: 2006-06-23 Title: Calibration functions for analytical chemistry Author: Johannes Ranke Maintainer: Johannes Ranke diff --git a/R/calplot.R b/R/calplot.R index 0cc6120..6aed9c0 100644 --- a/R/calplot.R +++ b/R/calplot.R @@ -1,6 +1,6 @@ calplot <- function(object, - xlim = c("auto","auto"), ylim = c("auto","auto"), - xlab = "Concentration", ylab = "Response", alpha=0.05, + xlim = c("auto", "auto"), ylim = c("auto", "auto"), + xlab = "Concentration", ylab = "Response", alpha = 0.05, varfunc = NULL) { UseMethod("calplot") @@ -22,14 +22,6 @@ calplot.lm <- function(object, if (length(object$coef) > 2) stop("More than one independent variable in your model - not implemented") - if (length(object$weights) > 0) { - stop(paste( - "\nConfidence and prediction intervals for weighted linear models require", - "weights for the x values from which the predictions are to be generated.", - "This is not supported by the internally used predict.lm method.", - sep = "\n")) - } - if (alpha <= 0 | alpha >= 1) stop("Alpha should be between 0 and 1 (exclusive)") @@ -65,9 +57,17 @@ calplot.lm <- function(object, points(x,y, pch = 21, bg = "yellow") matlines(newdata[[1]], pred.lim, lty = c(1, 4, 4), col = c("black", "red", "red")) + if (length(object$weights) > 0) { + legend(min(x), + max(pred.lim, na.rm = TRUE), + legend = c("Fitted Line", "Confidence Bands"), + lty = c(1, 3), + lwd = 2, + col = c("black", "green4"), + horiz = FALSE, cex = 0.9, bg = "gray95") + } else { matlines(newdata[[1]], conf.lim, lty = c(1, 3, 3), col = c("black", "green4", "green4")) - legend(min(x), max(pred.lim, na.rm = TRUE), legend = c("Fitted Line", "Confidence Bands", @@ -76,4 +76,5 @@ calplot.lm <- function(object, lwd = 2, col = c("black", "green4", "red"), horiz = FALSE, cex = 0.9, bg = "gray95") + } } diff --git a/R/inverse.predict.lm.R b/R/inverse.predict.lm.R index 8352c26..927e672 100644 --- a/R/inverse.predict.lm.R +++ b/R/inverse.predict.lm.R @@ -1,21 +1,21 @@ # This is an implementation of Equation (8.28) in the Handbook of Chemometrics # and Qualimetrics, Part A, Massart et al (1997), page 200, validated with -# Example 8 on the same page +# Example 8 on the same page, extended as specified in the package vignette inverse.predict <- function(object, newdata, ..., - ws = "auto", alpha = 0.05, ss = "auto") + ws = "auto", alpha = 0.05, var.s = "auto") { UseMethod("inverse.predict") } inverse.predict.default <- function(object, newdata, ..., - ws = "auto", alpha = 0.05, ss = "auto") + ws = "auto", alpha = 0.05, var.s = "auto") { stop("Inverse prediction only implemented for univariate lm objects.") } inverse.predict.lm <- function(object, newdata, ..., - ws = "auto", alpha = 0.05, ss = "auto") + ws = "auto", alpha = 0.05, var.s = "auto") { yname = names(object$model)[[1]] xname = names(object$model)[[2]] @@ -29,11 +29,11 @@ inverse.predict.lm <- function(object, newdata, ..., w <- rep(1,length(split(object$model[[yname]],object$model[[xname]]))) } .inverse.predict(object = object, newdata = newdata, - ws = ws, alpha = alpha, ss = ss, w = w, xname = xname, yname = yname) + ws = ws, alpha = alpha, var.s = var.s, w = w, xname = xname, yname = yname) } inverse.predict.rlm <- function(object, newdata, ..., - ws = "auto", alpha = 0.05, ss = "auto") + ws = "auto", alpha = 0.05, var.s = "auto") { yname = names(object$model)[[1]] xname = names(object$model)[[2]] @@ -43,10 +43,10 @@ inverse.predict.rlm <- function(object, newdata, ..., wx <- split(object$weights,object$model[[xname]]) w <- sapply(wx,mean) .inverse.predict(object = object, newdata = newdata, - ws = ws, alpha = alpha, ss = ss, w = w, xname = xname, yname = yname) + ws = ws, alpha = alpha, var.s = var.s, w = w, xname = xname, yname = yname) } -.inverse.predict <- function(object, newdata, ws, alpha, ss, w, xname, yname) +.inverse.predict <- function(object, newdata, ws, alpha, var.s, w, xname, yname) { if (length(object$coef) > 2) stop("More than one independent variable in your model - not implemented") @@ -57,27 +57,26 @@ inverse.predict.rlm <- function(object, newdata, ..., ybars <- mean(newdata) m <- length(newdata) - yx <- split(object$model[[yname]],object$model[[xname]]) + yx <- split(object$model[[yname]], object$model[[xname]]) n <- length(yx) df <- n - length(object$coef) x <- as.numeric(names(yx)) ybar <- sapply(yx,mean) - yhatx <- split(object$fitted.values,object$model[[xname]]) - yhat <- sapply(yhatx,mean) - se <- sqrt(sum(w*(ybar - yhat)^2)/df) - if (ss == "auto") { - ss <- se - } else { - ss <- ss + yhatx <- split(object$fitted.values, object$model[[xname]]) + yhat <- sapply(yhatx, mean) + se <- sqrt(sum(w * (ybar - yhat)^2)/df) + + if (var.s == "auto") { + var.s <- se^2/ws } b1 <- object$coef[[xname]] ybarw <- sum(w * ybar)/sum(w) -# This is an adapted form of equation 8.28 (see package vignette) +# This is the adapted form of equation 8.28 (see package vignette) sxhats <- 1/b1 * sqrt( - ss^2/(ws * m) + + (var.s / m) + se^2 * (1/sum(w) + (ybars - ybarw)^2 * sum(w) / (b1^2 * (sum(w) * sum(w * x^2) - sum(w * x)^2))) diff --git a/R/lod.R b/R/lod.R index 54618c8..f5bbb7d 100644 --- a/R/lod.R +++ b/R/lod.R @@ -1,14 +1,14 @@ -lod <- function(object, ..., alpha = 0.05, beta = 0.05) +lod <- function(object, ..., alpha = 0.05, beta = 0.05, method = "default") { UseMethod("lod") } -lod.default <- function(object, ..., alpha = 0.05, beta = 0.05) +lod.default <- function(object, ..., alpha = 0.05, beta = 0.05, method = "default") { stop("lod is only implemented for univariate lm objects.") } -lod.lm <- function(object, ..., alpha = 0.05, beta = 0.05) +lod.lm <- function(object, ..., alpha = 0.05, beta = 0.05, method = "default") { if (length(object$weights) > 0) { stop(paste( @@ -24,23 +24,29 @@ lod.lm <- function(object, ..., alpha = 0.05, beta = 0.05) yname <- names(object$model)[[1]] newdata <- data.frame(0) names(newdata) <- xname - y0 <- predict(object, newdata, interval="prediction", - level = 1 - 2 * alpha ) + y0 <- predict(object, newdata, interval = "prediction", + level = 1 - 2 * alpha) yc <- y0[[1,"upr"]] - xc <- inverse.predict(object,yc)[["Prediction"]] - f <- function(x) - { - newdata <- data.frame(x) - names(newdata) <- xname - pi.y <- predict(object, newdata, interval = "prediction", + if (method == "din") { + y0.d <- predict(object, newdata, interval = "prediction", level = 1 - 2 * beta) - yd <- pi.y[[1,"lwr"]] - (yd - yc)^2 + deltay <- y0.d[[1, "upr"]] - y0.d[[1, "fit"]] + lod.y <- yc + deltay + lod.x <- inverse.predict(object, lod.y)$Prediction + } else { + f <- function(x) { + newdata <- data.frame(x) + names(newdata) <- xname + pi.y <- predict(object, newdata, interval = "prediction", + level = 1 - 2 * beta) + yd <- pi.y[[1,"lwr"]] + (yd - yc)^2 + } + lod.x <- optimize(f,interval=c(0,max(object$model[[xname]])))$minimum + newdata <- data.frame(x = lod.x) + names(newdata) <- xname + lod.y <- predict(object, newdata) } - lod.x <- optimize(f,interval=c(0,max(object$model[[xname]])))$minimum - newdata <- data.frame(x = lod.x) - names(newdata) <- xname - lod.y <- predict(object, newdata) lod <- list(lod.x, lod.y) names(lod) <- c(xname, yname) return(lod) diff --git a/R/loq.R b/R/loq.R index ee22d38..5776096 100644 --- a/R/loq.R +++ b/R/loq.R @@ -1,22 +1,32 @@ -loq <- function(object, ..., alpha = 0.05, k = 3, n = 1, w = "auto") +loq <- function(object, ..., alpha = 0.05, k = 3, n = 1, w.loq = "auto", + var.loq = "auto") { UseMethod("loq") } -loq.default <- function(object, ..., alpha = 0.05, k = 3, n = 1, w = "auto") +loq.default <- function(object, ..., alpha = 0.05, k = 3, n = 1, w.loq = "auto", + var.loq = "auto") { stop("loq is only implemented for univariate lm objects.") } -loq.lm <- function(object, ..., alpha = 0.05, k = 3, n = 1, w = "auto") +loq.lm <- function(object, ..., alpha = 0.05, k = 3, n = 1, w.loq = "auto", + var.loq = "auto") { + if (length(object$weights) > 0 && var.loq == "auto" && w.loq == "auto") { + stop(paste("If you are using a model from weighted regression,", + "you need to specify a reasonable approximation for the", + "weight (w.loq) or the variance (var.loq) at the", + "limit of quantification")) + } xname <- names(object$model)[[2]] yname <- names(object$model)[[1]] f <- function(x) { newdata <- data.frame(x = x) names(newdata) <- xname y <- predict(object, newdata) - p <- inverse.predict(object, rep(y, n), ws = w, alpha = alpha) + p <- inverse.predict(object, rep(y, n), ws = w.loq, + var.s = var.loq, alpha = alpha) (p[["Prediction"]] - k * p[["Confidence"]])^2 } tmp <- optimize(f,interval=c(0,max(object$model[[2]]))) diff --git a/demo/00Index b/demo/00Index deleted file mode 100644 index a548abc..0000000 --- a/demo/00Index +++ /dev/null @@ -1 +0,0 @@ -massart97ex8 Analysis of example 8 in Massart (1997) diff --git a/demo/massart97ex8.R b/demo/massart97ex8.R deleted file mode 100644 index 0a49ec2..0000000 --- a/demo/massart97ex8.R +++ /dev/null @@ -1,12 +0,0 @@ -data(massart97ex3) -attach(massart97ex3) -xi <- levels(factor(x)) -yx <- split(y,factor(x)) -ybari <- sapply(yx,mean) -si <- round(sapply(yx,sd),digits=2) -wi <- round(1/(si^2),digits=3) -weights <- wi[factor(x)] -m <- lm(y ~ x,w=weights) -inverse.predict(m, 15, ws = 1.67) -inverse.predict(m, 90, ws = 0.145) -#calplot(m) diff --git a/inst/doc/Makefile b/inst/doc/Makefile index 9c5cd3a..8eca69e 100644 --- a/inst/doc/Makefile +++ b/inst/doc/Makefile @@ -1,6 +1,6 @@ # Makefile for Sweave documents containing both Latex and R code # Author: Johannes Ranke -# Last Change: 2006 Mai 24 +# Last Change: 2006 Jun 23 # based on the Makefile of Nicholas Lewin-Koh # in turn based on work of Rouben Rostmaian # SVN: $Id: Makefile.rnoweb 62 2006-05-24 08:30:59Z ranke $ diff --git a/inst/doc/chemCal-001.pdf b/inst/doc/chemCal-001.pdf index 9ca7c06..9e77bb2 100644 --- a/inst/doc/chemCal-001.pdf +++ b/inst/doc/chemCal-001.pdf @@ -2,8 +2,8 @@ %ρ\r 1 0 obj << -/CreationDate (D:20060524104614) -/ModDate (D:20060524104614) +/CreationDate (D:20060623173000) +/ModDate (D:20060623173000) /Title (R Graphics Output) /Producer (R 2.3.1) /Creator (R) diff --git a/inst/doc/chemCal-002.pdf b/inst/doc/chemCal-002.pdf index 262f50a..44ab8b6 100644 --- a/inst/doc/chemCal-002.pdf +++ b/inst/doc/chemCal-002.pdf @@ -2,8 +2,8 @@ %ρ\r 1 0 obj << -/CreationDate (D:20060524104614) -/ModDate (D:20060524104614) +/CreationDate (D:20060623173002) +/ModDate (D:20060623173002) /Title (R Graphics Output) /Producer (R 2.3.1) /Creator (R) diff --git a/inst/doc/chemCal.Rnw b/inst/doc/chemCal.Rnw index afa5ad7..8cdc97c 100644 --- a/inst/doc/chemCal.Rnw +++ b/inst/doc/chemCal.Rnw @@ -8,15 +8,29 @@ \begin{document} \maketitle -When calibrating an analytical method, the first task is to generate -a suitable model. If we want to use the \texttt{chemCal} functions, we -will have to restrict ourselves to univariate, possibly weighted -\footnote{ -For the weighted case, the function \texttt{predict.lm} would have to be -adapted (Bug report PR\#8877), in order to allow for weights for the x values -used to predict the y values. This affects the functions \texttt{calplot} -and \texttt{lod}. -}, linear regression so far. +The \texttt{chemCal} package was first designed in the course of a lecture and lab +course on "analytics of organic trace contaminants" at the University of Bremen +from October to December 2004. In the fall 2005, an email exchange with +Ron Wehrens led to the belief that it would be desirable to implement the +inverse prediction method given in \cite{massart97} since it also covers the +case of weighted regression. Studies of the IUPAC orange book and of DIN 32645 +as well as publications by Currie and the Analytical Method Committee of the +Royal Society of Chemistry and a nice paper by Castillo and Castells provided +further understanding of the matter. + +At the moment, the package consists of four functions, working on univariate +linear models of class \texttt{lm} or \texttt{rlm}, plus to datasets for +validation. + +A \href{http://bugs.r-project.org/cgi-bin/R/wishlst-fulfilled?id=8877;user=guest}{bug +report (PR\#8877)} and the following e-mail exchange on the r-devel mailing list about +prediction intervals from weighted regression entailed some further studies +on this subject. However, I did not encounter any proof or explanation of the +formula cited below yet, so I can't really confirm that Massart's method is correct. + +When calibrating an analytical method, the first task is to generate a suitable +model. If we want to use the \texttt{chemCal} functions, we will have to restrict +ourselves to univariate, possibly weighted, linear regression so far. Once such a model has been created, the calibration can be graphically shown by using the \texttt{calplot} function: @@ -24,8 +38,7 @@ shown by using the \texttt{calplot} function: <>= library(chemCal) data(massart97ex3) -attach(massart97ex3) -m0 <- lm(y ~ x) +m0 <- lm(y ~ x, data = massart97ex3) calplot(m0) @ @@ -41,28 +54,26 @@ Therefore, in Example 8 in \cite{massart97} weighted regression is proposed which can be reproduced by <<>>= -yx <- split(y,x) -ybar <- sapply(yx,mean) -s <- round(sapply(yx,sd),digits=2) -w <- round(1/(s^2),digits=3) +attach(massart97ex3) +yx <- split(y, x) +ybar <- sapply(yx, mean) +s <- round(sapply(yx, sd), digits = 2) +w <- round(1 / (s^2), digits = 3) weights <- w[factor(x)] -m <- lm(y ~ x,w=weights) +m <- lm(y ~ x, w = weights) @ -Unfortunately, \texttt{calplot} does not work on weighted linear models, -as noted in the footnote above. - If we now want to predict a new x value from measured y values, we use the \texttt{inverse.predict} function: <<>>= -inverse.predict(m,15,ws=1.67) +inverse.predict(m, 15, ws=1.67) inverse.predict(m, 90, ws = 0.145) @ The weight \texttt{ws} assigned to the measured y value has to be -given by the user in the case of weighted regression. By default, -the mean of the weights used in the linear regression is used. +given by the user in the case of weighted regression, or alternatively, +the approximate variance \texttt{var.s} at this location. \section*{Theory for \texttt{inverse.predict}} Equation 8.28 in \cite{massart97} gives a general equation for predicting the @@ -104,6 +115,10 @@ s_{\hat{x_s}} = \frac{1}{b_1} \sqrt{\frac{{s_s}^2}{w_s m} + {{b_1}^2 \left( \sum{w_i} \sum{w_i {x_i}^2} - {\left( \sum{ w_i x_i } \right)}^2 \right) } \right) } \end{equation} +where I interpret $\frac{{s_s}^2}{w_s}$ as an estimator of the variance at location +$\hat{x_s}$, which can be replaced by a user-specified value using the argument +\texttt{var.s} of the function \texttt{inverse.predict}. + \begin{thebibliography}{1} \bibitem{massart97} Massart, L.M, Vandenginste, B.G.M., Buydens, L.M.C., De Jong, S., Lewi, P.J., diff --git a/inst/doc/chemCal.aux b/inst/doc/chemCal.aux index 0eb51cc..20bfc98 100644 --- a/inst/doc/chemCal.aux +++ b/inst/doc/chemCal.aux @@ -11,6 +11,7 @@ \global \let \hyper@last\relax \fi +\citation{massart97} \citation{massart97} \citation{massart97} \citation{massart97} diff --git a/inst/doc/chemCal.log b/inst/doc/chemCal.log index 237b975..fc0ba0f 100644 --- a/inst/doc/chemCal.log +++ b/inst/doc/chemCal.log @@ -1,4 +1,4 @@ -This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=pdflatex 2006.4.8) 24 MAY 2006 10:46 +This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=pdflatex 2006.5.30) 23 JUN 2006 17:30 entering extended mode **chemCal.tex (./chemCal.tex @@ -297,65 +297,62 @@ LaTeX Font Info: External font `cmex10' loaded for size (Font) <8> on input line 11. LaTeX Font Info: External font `cmex10' loaded for size (Font) <6> on input line 11. -LaTeX Font Info: Try loading font information for T1+aett on input line 13. +LaTeX Font Info: Try loading font information for T1+aett on input line 12. (/usr/share/texmf-tetex/tex/latex/ae/t1aett.fd File: t1aett.fd 1997/11/16 Font definitions for T1/aett. ) -LaTeX Font Info: External font `cmex10' loaded for size -(Font) <7> on input line 15. -LaTeX Font Info: External font `cmex10' loaded for size -(Font) <5> on input line 15. - File: chemCal-001.pdf Graphic file (type pdf) [1 -{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map} <./chemCal-001.pdf>] - +{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}] + File: chemCal-002.pdf Graphic file (type pdf) - -LaTeX Font Info: Try loading font information for TS1+aett on input line 75. + [2 <./chemCal-001.pdf>] +LaTeX Font Info: Try loading font information for TS1+aett on input line 86. -LaTeX Font Info: No file TS1aett.fd. on input line 75. +LaTeX Font Info: No file TS1aett.fd. on input line 86. LaTeX Font Warning: Font shape `TS1/aett/m/n' undefined (Font) using `TS1/cmr/m/n' instead -(Font) for symbol `textasciigrave' on input line 75. +(Font) for symbol `textasciigrave' on input line 86. -[2 <./chemCal-002.pdf>] +[3 <./chemCal-002.pdf>] LaTeX Font Warning: Font shape `T1/aett/bx/n' undefined -(Font) using `T1/aett/m/n' instead on input line 106. +(Font) using `T1/aett/m/n' instead on input line 117. -[3] [4] (./chemCal.aux) +LaTeX Font Info: External font `cmex10' loaded for size +(Font) <7> on input line 119. +LaTeX Font Info: External font `cmex10' loaded for size +(Font) <5> on input line 119. +[4] [5] (./chemCal.aux) LaTeX Font Warning: Some font shapes were not available, defaults substituted. ) Here is how much of TeX's memory you used: - 3772 strings out of 94500 - 51588 string characters out of 1175772 + 3756 strings out of 94499 + 51475 string characters out of 1175813 97245 words of memory out of 1000000 - 6857 multiletter control sequences out of 10000+50000 - 29608 words of font info for 66 fonts, out of 500000 for 2000 + 6853 multiletter control sequences out of 10000+50000 + 22302 words of font info for 54 fonts, out of 500000 for 2000 580 hyphenation exceptions out of 8191 - 35i,8n,21p,255b,279s stack positions out of 1500i,500n,5000p,200000b,5000s + 35i,7n,21p,255b,283s stack positions out of 1500i,500n,5000p,200000b,5000s PDF statistics: - 95 PDF objects out of 300000 + 93 PDF objects out of 300000 12 named destinations out of 131072 27 words of extra memory for PDF output out of 65536 - < -/usr/share/texmf-tetex/fonts/type1/bluesky/cm/cmsltt10.pfb> -Output written on chemCal.pdf (4 pages, 133895 bytes). + +Output written on chemCal.pdf (5 pages, 123681 bytes). diff --git a/inst/doc/chemCal.pdf b/inst/doc/chemCal.pdf index 45a50eb..027e053 100644 Binary files a/inst/doc/chemCal.pdf and b/inst/doc/chemCal.pdf differ diff --git a/inst/doc/chemCal.tex b/inst/doc/chemCal.tex index e26172f..0469848 100644 --- a/inst/doc/chemCal.tex +++ b/inst/doc/chemCal.tex @@ -9,15 +9,29 @@ \begin{document} \maketitle -When calibrating an analytical method, the first task is to generate -a suitable model. If we want to use the \texttt{chemCal} functions, we -will have to restrict ourselves to univariate, possibly weighted -\footnote{ -For the weighted case, the function \texttt{predict.lm} would have to be -adapted (Bug report PR\#8877), in order to allow for weights for the x values -used to predict the y values. This affects the functions \texttt{calplot} -and \texttt{lod}. -}, linear regression so far. +The \texttt{chemCal} package was first designed in the course of a lecture and lab +course on "analytics of organic trace contaminants" at the University of Bremen +from October to December 2004. In the fall 2005, an email exchange with +Ron Wehrens led to the belief that it would be desirable to implement the +inverse prediction method given in \cite{massart97} since it also covers the +case of weighted regression. Studies of the IUPAC orange book and of DIN 32645 +as well as publications by Currie and the Analytical Method Committee of the +Royal Society of Chemistry and a nice paper by Castillo and Castells provided +further understanding of the matter. + +At the moment, the package consists of four functions, working on univariate +linear models of class \texttt{lm} or \texttt{rlm}, plus to datasets for +validation. + +A \href{http://bugs.r-project.org/cgi-bin/R/wishlst-fulfilled?id=8877;user=guest}{bug +report (PR\#8877)} and the following e-mail exchange on the r-devel mailing list about +prediction intervals from weighted regression entailed some further studies +on this subject. However, I did not encounter any proof or explanation of the +formula cited below yet, so I can't really confirm that Massart's method is correct. + +When calibrating an analytical method, the first task is to generate a suitable +model. If we want to use the \texttt{chemCal} functions, we will have to restrict +ourselves to univariate, possibly weighted, linear regression so far. Once such a model has been created, the calibration can be graphically shown by using the \texttt{calplot} function: @@ -26,8 +40,7 @@ shown by using the \texttt{calplot} function: \begin{Sinput} > library(chemCal) > data(massart97ex3) -> attach(massart97ex3) -> m0 <- lm(y ~ x) +> m0 <- lm(y ~ x, data = massart97ex3) > calplot(m0) \end{Sinput} \end{Schunk} @@ -49,6 +62,7 @@ is proposed which can be reproduced by \begin{Schunk} \begin{Sinput} +> attach(massart97ex3) > yx <- split(y, x) > ybar <- sapply(yx, mean) > s <- round(sapply(yx, sd), digits = 2) @@ -58,9 +72,6 @@ is proposed which can be reproduced by \end{Sinput} \end{Schunk} -Unfortunately, \texttt{calplot} does not work on weighted linear models, -as noted in the footnote above. - If we now want to predict a new x value from measured y values, we use the \texttt{inverse.predict} function: @@ -100,8 +111,8 @@ $`Confidence Limits` \end{Schunk} The weight \texttt{ws} assigned to the measured y value has to be -given by the user in the case of weighted regression. By default, -the mean of the weights used in the linear regression is used. +given by the user in the case of weighted regression, or alternatively, +the approximate variance \texttt{var.s} at this location. \section*{Theory for \texttt{inverse.predict}} Equation 8.28 in \cite{massart97} gives a general equation for predicting the @@ -143,6 +154,10 @@ s_{\hat{x_s}} = \frac{1}{b_1} \sqrt{\frac{{s_s}^2}{w_s m} + {{b_1}^2 \left( \sum{w_i} \sum{w_i {x_i}^2} - {\left( \sum{ w_i x_i } \right)}^2 \right) } \right) } \end{equation} +where I interpret $\frac{{s_s}^2}{w_s}$ as an estimator of the variance at location +$\hat{x_s}$, which can be replaced by a user-specified value using the argument +\texttt{var.s} of the function \texttt{inverse.predict}. + \begin{thebibliography}{1} \bibitem{massart97} Massart, L.M, Vandenginste, B.G.M., Buydens, L.M.C., De Jong, S., Lewi, P.J., diff --git a/man/calplot.lm.Rd b/man/calplot.lm.Rd index de63022..bf3f616 100644 --- a/man/calplot.lm.Rd +++ b/man/calplot.lm.Rd @@ -5,12 +5,11 @@ \title{Plot calibration graphs from univariate linear models} \description{ Produce graphics of calibration data, the fitted model as well - as prediction and confidence bands. + as confidence, and, for unweighted regression, prediction bands. } \usage{ - calplot(object, xlim = c("auto","auto"), ylim = c("auto","auto"), - xlab = "Concentration", ylab = "Response", alpha=0.05, - varfunc = NULL) + calplot(object, xlim = c("auto", "auto"), ylim = c("auto", "auto"), + xlab = "Concentration", ylab = "Response", alpha=0.05, varfunc = NULL) } \arguments{ \item{object}{ @@ -40,7 +39,8 @@ } \value{ A plot of the calibration data, of your fitted model as well as lines showing - the confidence limits as well as the prediction limits. + the confidence limits. Prediction limits are only shown for models from + unweighted regression. } \note{ Prediction bands for models from weighted linear regression require weights @@ -51,7 +51,7 @@ } \examples{ data(massart97ex3) -m <- lm(y ~ x, data=massart97ex3) +m <- lm(y ~ x, data = massart97ex3) calplot(m) } \author{ diff --git a/man/din32645.Rd b/man/din32645.Rd index 94486c4..cacbf07 100644 --- a/man/din32645.Rd +++ b/man/din32645.Rd @@ -11,39 +11,44 @@ } \examples{ data(din32645) -m <- lm(y ~ x, data=din32645) +m <- lm(y ~ x, data = din32645) calplot(m) -(prediction <- inverse.predict(m,3500,alpha=0.01)) -# This should give 0.07434 according to Dintest test data, as -# collected from Procontrol 3.1 (isomehr GmbH) + +## Prediction of x with confidence interval +(prediction <- inverse.predict(m, 3500, alpha = 0.01)) + +# This should give 0.07434 according to test data from Dintest, which +# was collected from Procontrol 3.1 (isomehr GmbH) in this case round(prediction$Confidence,5) -# According to Dintest test data, we should get 0.0698 for the critical value +## Critical value: +(crit <- lod(m, alpha = 0.01, beta = 0.5)) + +# According to DIN 32645, we should get 0.07 for the critical value # (decision limit, "Nachweisgrenze") -(lod <- lod(m, alpha = 0.01, beta = 0.5)) -round(lod$x,4) +round(crit$x, 2) +# and according to Dintest test data, we should get 0.0698 from +round(crit$x, 4) +## Limit of detection (smallest detectable value given alpha and beta) # In German, the smallest detectable value is the "Erfassungsgrenze", and we -# should get 0.140 according to Dintest test data, but with chemCal, we can't -# reproduce this, -lod(m, alpha = 0.01, beta = 0.01) -# except by using an equivalent to the approximation -# xD = 2 * Sc / A (Currie 1999, p. 118, or Orange Book, Chapter 18.4.3.7) -lod.approx <- 2 * lod$x -round(lod.approx, digits=3) -# which seems to be the pragmatic definition in DIN 32645, as judging from -# the Dintest test data. - -# This accords to the test data from Dintest again, except for the last digit -# of the value cited for Procontrol 3.1 (0.2121) +# should get 0.14 according to DIN, which we achieve by using the method +# described in it: +lod.din <- lod(m, alpha = 0.01, beta = 0.01, method = "din") +round(lod.din$x, 2) + +## Limit of quantification +# This accords to the test data coming with the test data from Dintest again, +# except for the last digits of the value cited for Procontrol 3.1 (0.2121) (loq <- loq(m, alpha = 0.01)) round(loq$x,4) + # A similar value is obtained using the approximation # LQ = 3.04 * LC (Currie 1999, p. 120) 3.04 * lod(m,alpha = 0.01, beta = 0.5)$x } \references{ - DIN 32645 (equivalent to ISO 11843) + DIN 32645 (equivalent to ISO 11843), Beuth Verlag, Berlin, 1994 Dintest. Plugin for MS Excel for evaluations of calibration data. Written by Georg Schmitt, University of Heidelberg. diff --git a/man/inverse.predict.Rd b/man/inverse.predict.Rd index 5be0250..925f3e9 100644 --- a/man/inverse.predict.Rd +++ b/man/inverse.predict.Rd @@ -5,7 +5,7 @@ \alias{inverse.predict.default} \title{Predict x from y for a linear calibration} \usage{inverse.predict(object, newdata, \dots, - ws, alpha=0.05, ss = "auto") + ws, alpha=0.05, var.s = "auto") } \arguments{ \item{object}{ @@ -21,15 +21,17 @@ future implementations. } \item{ws}{ - The weight attributed to the sample. The default is to take the - mean of the weights in the model, if there are any. + The weight attributed to the sample. This argument is obligatory + if \code{object} has weights. } \item{alpha}{ The error tolerance level for the confidence interval to be reported. } - \item{ss}{ - The estimated standard error of the sample measurements. The - default is to take the residual standard error from the calibration. + \item{var.s}{ + The estimated variance of the sample measurements. The default is to take + the residual standard error from the calibration and to adjust it + using \code{ws}, if applicable. This means that \code{var.s} + overrides \code{ws}. } } \value{ @@ -59,12 +61,13 @@ \examples{ data(massart97ex3) attach(massart97ex3) - yx <- split(y,factor(x)) - s <- round(sapply(yx,sd),digits=2) - w <- round(1/(s^2),digits=3) + yx <- split(y, x) + ybar <- sapply(yx, mean) + s <- round(sapply(yx, sd), digits = 2) + w <- round(1 / (s^2), digits = 3) weights <- w[factor(x)] - m <- lm(y ~ x,w=weights) + m <- lm(y ~ x, w = weights) - inverse.predict(m,15,ws = 1.67) + inverse.predict(m, 15, ws = 1.67) # 5.9 +- 2.5 } \keyword{manip} diff --git a/man/lod.Rd b/man/lod.Rd index c2fe4e9..055074c 100644 --- a/man/lod.Rd +++ b/man/lod.Rd @@ -5,7 +5,7 @@ \alias{lod.default} \title{Estimate a limit of detection (LOD)} \usage{ - lod(object, \dots, alpha = 0.05, beta = 0.05) + lod(object, \dots, alpha = 0.05, beta = 0.05, method = "default") } \arguments{ \item{object}{ @@ -24,10 +24,18 @@ \item{beta}{ The error tolerance beta for the detection limit. } + \item{method}{ + The default method uses a prediction interval at the LOD + for the estimation of the LOD, which obviously requires + iteration. This is described for example in Massart, p. 432 ff. + The \dQuote{din} method uses the prediction interval at + x = 0 as an approximation. + } } \value{ A list containig the corresponding x and y values of the estimated limit of - detection of a model used for calibration. } + detection of a model used for calibration. +} \description{ The decision limit (German: Nachweisgrenze) is defined as the signal or analyte concentration that is significantly different from the blank signal @@ -39,7 +47,7 @@ one-sided significance test). } \note{ - - The default values for alpha and beta are recommended by IUPAC. + - The default values for alpha and beta are the ones recommended by IUPAC. - The estimation of the LOD in terms of the analyte amount/concentration xD from the LOD in the signal domain SD is done by simply inverting the calibration function (i.e. assuming a known calibration function). @@ -68,8 +76,8 @@ # The critical value (decision limit, German Nachweisgrenze) can be obtained # by using beta = 0.5: lod(m, alpha = 0.01, beta = 0.5) - # or approximated by - 2 * lod(m, alpha = 0.01, beta = 0.5)$x - # for the case of known, constant variance (homoscedastic data) +} +\seealso{ + Examples for \code{\link{din32645}} } \keyword{manip} diff --git a/man/loq.Rd b/man/loq.Rd index 4850487..0250098 100644 --- a/man/loq.Rd +++ b/man/loq.Rd @@ -5,14 +5,17 @@ \alias{loq.default} \title{Estimate a limit of quantification (LOQ)} \usage{ - loq(object, \dots, alpha = 0.05, k = 3, n = 1, w = "auto") + loq(object, \dots, alpha = 0.05, k = 3, n = 1, w.loq = "auto", + var.loq = "auto") } \arguments{ \item{object}{ A univariate model object of class \code{\link{lm}} or \code{\link[MASS:rlm]{rlm}} with model formula \code{y ~ x} or \code{y ~ x - 1}, - optionally from a weighted regression. + optionally from a weighted regression. If weights are specified + in the model, either \code{w.loq} or \code{var.loq} have to + be specified. } \item{alpha}{ The error tolerance for the prediction of x values in the calculation. @@ -29,53 +32,46 @@ The number of replicate measurements for which the LOQ should be specified. } - \item{w}{ + \item{w.loq}{ The weight that should be attributed to the LOQ. Defaults to one for unweighted regression, and to the mean of the weights for weighted regression. See \code{\link{massart97ex3}} for an example how to take advantage of knowledge about the variance function. } + \item{var.loq}{ + The approximate variance at the LOQ. The default value is + calculated from the model. + } } \value{ The estimated limit of quantification for a model used for calibration. } \description{ - A useful operationalisation of a limit of quantification is simply the - solution of the equation + The limit of quantification is the x value, where the relative error + of the quantification given the calibration model reaches a prespecified + value 1/k. Thus, it is the solution of the equation \deqn{L = k c(L)}{L = k * c(L)} - where c(L) is half of the length of the confidence interval at the limit L as - estimated by \code{\link{inverse.predict}}. By virtue of this formula, the - limit of detection is the x value, where the relative error - of the quantification with the given calibration model is 1/k. + where c(L) is half of the length of the confidence interval at the limit L + (DIN 32645, equivalent to ISO 11843). c(L) is internally estimated by + \code{\link{inverse.predict}}, and L is obtained by iteration. } \note{ - IUPAC recommends to base the LOQ on the standard deviation of the - signal where x = 0. The approach taken here is to my knowledge - original to the chemCal package. + - IUPAC recommends to base the LOQ on the standard deviation of the signal + where x = 0. + - The calculation of a LOQ based on weighted regression is non-standard + and therefore not tested. Feedback is welcome. } \examples{ data(massart97ex3) attach(massart97ex3) - m0 <- lm(y ~ x) - loq(m0) - - # Now we use a weighted regression - yx <- split(y,factor(x)) - s <- round(sapply(yx,sd),digits=2) - w <- round(1/(s^2),digits=3) - weights <- w[factor(x)] - mw <- lm(y ~ x,w=weights) - loq(mw) - - # In order to define the weight at the loq, we can use - # the variance function 1/y for the model - mwy <- lm(y ~ x, w = 1/y) + m <- lm(y ~ x) + loq(m) - # Let's do this with one iteration only - loq(mwy, w = 1 / predict(mwy,list(x = loq(mwy)$x))) - - # We can get better by doing replicate measurements - loq(mwy, n = 3, w = 1 / predict(mwy,list(x = loq(mwy)$x))) + # We can get better by using replicate measurements + loq(m, n = 3) +} +\seealso{ + Examples for \code{\link{din32645}} } \keyword{manip} diff --git a/man/massart97ex3.Rd b/man/massart97ex3.Rd index eb00e79..e7cd383 100644 --- a/man/massart97ex3.Rd +++ b/man/massart97ex3.Rd @@ -3,7 +3,7 @@ \alias{massart97ex3} \title{Calibration data from Massart et al. (1997), example 3} \description{ - Sample dataset to test the package. + Sample dataset from p. 188 to test the package. } \usage{data(massart97ex3)} \format{ @@ -11,37 +11,42 @@ observations of y for each level. } \examples{ -data(massart97ex3) -attach(massart97ex3) -yx <- split(y,x) -ybar <- sapply(yx,mean) -s <- round(sapply(yx,sd),digits=2) -w <- round(1/(s^2),digits=3) -weights <- w[factor(x)] -m <- lm(y ~ x,w=weights) -# The following concords with the book -inverse.predict(m, 15, ws = 1.67) -inverse.predict(m, 90, ws = 0.145) + data(massart97ex3) + attach(massart97ex3) + yx <- split(y, x) + ybar <- sapply(yx, mean) + s <- round(sapply(yx, sd), digits = 2) + w <- round(1 / (s^2), digits = 3) + weights <- w[factor(x)] + m <- lm(y ~ x, w = weights) + calplot(m) -# Some of the following examples are commented out, because the require -# prediction intervals from predict.lm for weighted models, which is not -# available in R at the moment. + # The following concords with the book p. 200 + inverse.predict(m, 15, ws = 1.67) # 5.9 +- 2.5 + inverse.predict(m, 90, ws = 0.145) # 44.1 +- 7.9 -#calplot(m) + # The LOD is only calculated for models from unweighted regression + # with this version of chemCal + m0 <- lm(y ~ x) + lod(m0) -m0 <- lm(y ~ x) -lod(m0) -#lod(m) + # Limit of quantification from unweighted regression + m0 <- lm(y ~ x) + loq(m0) -# Now we want to take advantage of the lower weights at lower y values -#m2 <- lm(y ~ x, w = 1/y) -# To get a reasonable weight for the lod, we need to estimate it and predict -# a y value for it -#yhat.lod <- predict(m,data.frame(x = lod(m2))) -#lod(m2,w=1/yhat.lod,k=3) + # For calculating the limit of quantification from a model from weighted + # regression, we need to supply weights, internally used for inverse.predict + # If we are not using a variance function, we can use the weight from + # the above example as a first approximation (x = 15 is close to our + # loq approx 14 from above). + loq(m, w.loq = 1.67) + # The weight for the loq should therefore be derived at x = 7.3 instead + # of 15, but the graphical procedure of Massart (p. 201) to derive the + # variances on which the weights are based is quite inaccurate anyway. } \source{ - Massart, L.M, Vandenginste, B.G.M., Buydens, L.M.C., De Jong, S., Lewi, P.J., - Smeyers-Verbeke, J. (1997) Handbook of Chemometrics and Qualimetrics: Part A, p. 188 + Massart, L.M, Vandenginste, B.G.M., Buydens, L.M.C., De Jong, S., Lewi, P.J., + Smeyers-Verbeke, J. (1997) Handbook of Chemometrics and Qualimetrics: Part A, + Chapter 8. } \keyword{datasets} diff --git a/tests/din32645.R b/tests/din32645.R index dc0aee6..e5ffed7 100644 --- a/tests/din32645.R +++ b/tests/din32645.R @@ -1,7 +1,7 @@ -library(chemCal) +require(chemCal) data(din32645) -m <- lm(y ~ x, data=din32645) -inverse.predict(m,3500,alpha=0.01) +m <- lm(y ~ x, data = din32645) +inverse.predict(m, 3500, alpha = 0.01) lod <- lod(m, alpha = 0.01, beta = 0.5) lod(m, alpha = 0.01, beta = 0.01) loq <- loq(m, alpha = 0.01) diff --git a/tests/din32645.Rout.save b/tests/din32645.Rout.save index 10cd1ab..c5ed5a7 100644 --- a/tests/din32645.Rout.save +++ b/tests/din32645.Rout.save @@ -1,6 +1,6 @@ R : Copyright 2006, The R Foundation for Statistical Computing -Version 2.3.0 (2006-04-24) +Version 2.3.1 (2006-06-01) ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. @@ -15,10 +15,12 @@ Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. -> library(chemCal) +> require(chemCal) +Loading required package: chemCal +[1] TRUE > data(din32645) -> m <- lm(y ~ x, data=din32645) -> inverse.predict(m,3500,alpha=0.01) +> m <- lm(y ~ x, data = din32645) +> inverse.predict(m, 3500, alpha = 0.01) $Prediction [1] 0.1054792 diff --git a/tests/massart97.R b/tests/massart97.R index 7170ec4..58119d9 100644 --- a/tests/massart97.R +++ b/tests/massart97.R @@ -1,12 +1,19 @@ -library(chemCal) +require(chemCal) data(massart97ex3) attach(massart97ex3) -yx <- split(y,x) -ybar <- sapply(yx,mean) -s <- round(sapply(yx,sd),digits=2) -w <- round(1/(s^2),digits=3) +yx <- split(y, x) +ybar <- sapply(yx, mean) +s <- round(sapply(yx, sd), digits = 2) +w <- round(1 / (s^2), digits = 3) weights <- w[factor(x)] -m <- lm(y ~ x,w=weights) -# The following concords with the book -inverse.predict(m, 15, ws = 1.67) -inverse.predict(m, 90, ws = 0.145) +m <- lm(y ~ x, w = weights) +#calplot(m) + +inverse.predict(m, 15, ws = 1.67) # 5.9 +- 2.5 +inverse.predict(m, 90, ws = 0.145) # 44.1 +- 7.9 + +m0 <- lm(y ~ x) +lod(m0) + +loq(m0) +loq(m, w.loq = 1.67) diff --git a/tests/massart97.Rout.save b/tests/massart97.Rout.save index ae50275..9386a11 100644 --- a/tests/massart97.Rout.save +++ b/tests/massart97.Rout.save @@ -1,6 +1,6 @@ R : Copyright 2006, The R Foundation for Statistical Computing -Version 2.3.0 (2006-04-24) +Version 2.3.1 (2006-06-01) ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. @@ -15,17 +15,20 @@ Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. -> library(chemCal) +> require(chemCal) +Loading required package: chemCal +[1] TRUE > data(massart97ex3) > attach(massart97ex3) -> yx <- split(y,x) -> ybar <- sapply(yx,mean) -> s <- round(sapply(yx,sd),digits=2) -> w <- round(1/(s^2),digits=3) +> yx <- split(y, x) +> ybar <- sapply(yx, mean) +> s <- round(sapply(yx, sd), digits = 2) +> w <- round(1 / (s^2), digits = 3) > weights <- w[factor(x)] -> m <- lm(y ~ x,w=weights) -> # The following concords with the book -> inverse.predict(m, 15, ws = 1.67) +> m <- lm(y ~ x, w = weights) +> #calplot(m) +> +> inverse.predict(m, 15, ws = 1.67) # 5.9 +- 2.5 $Prediction [1] 5.865367 @@ -38,7 +41,7 @@ $Confidence $`Confidence Limits` [1] 3.387082 8.343652 -> inverse.predict(m, 90, ws = 0.145) +> inverse.predict(m, 90, ws = 0.145) # 44.1 +- 7.9 $Prediction [1] 44.06025 @@ -52,3 +55,27 @@ $`Confidence Limits` [1] 36.20523 51.91526 > +> m0 <- lm(y ~ x) +> lod(m0) +$x +[1] 5.406637 + +$y +[1] 13.63822 + +> +> loq(m0) +$x +[1] 13.97767 + +$y +[1] 30.62355 + +> loq(m, w.loq = 1.67) +$x +[1] 7.346231 + +$y +[1] 17.90784 + +> -- cgit v1.2.1