From f381f9a6a8a47b89ec25cd627833a7248da7932b Mon Sep 17 00:00:00 2001 From: ranke Date: Tue, 23 May 2006 07:33:22 +0000 Subject: Don't do calplot and lod for linear models from weighted regression any more, since this is not supported (PR#8877). git-svn-id: http://kriemhild.uft.uni-bremen.de/svn/chemCal@13 5fad18fb-23f0-0310-ab10-e59a3bee62b4 --- DESCRIPTION | 4 ++-- R/calplot.R | 52 +++++++++++++++++++++++++++++++++----------- R/inverse.predict.lm.R | 2 +- R/lod.R | 19 +++++++++++++--- R/loq.R | 16 +++++++++++--- TODO | 5 +++++ inst/doc/chemCal-001.pdf | 4 ++-- inst/doc/chemCal.Rnw | 13 ++++++----- inst/doc/chemCal.log | 4 ++-- inst/doc/chemCal.pdf | Bin 107306 -> 107447 bytes man/calplot.lm.Rd | 4 +++- man/din32645.Rd | 37 +++++++++++++++++++++++-------- man/lod.Rd | 27 ++++++++++++++++++----- man/loq.Rd | 9 ++++++-- tests/din32645.R | 7 ++++++ tests/din32645.Rout.save | 43 ++++++++++++++++++++++++++++++++++++ tests/massart97.R | 12 +++++++++++ tests/massart97.Rout.save | 54 ++++++++++++++++++++++++++++++++++++++++++++++ 18 files changed, 262 insertions(+), 50 deletions(-) create mode 100644 TODO create mode 100644 tests/din32645.R create mode 100644 tests/din32645.Rout.save create mode 100644 tests/massart97.R create mode 100644 tests/massart97.Rout.save diff --git a/DESCRIPTION b/DESCRIPTION index 8dc884e..689bbca 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: chemCal -Version: 0.05-11 -Date: 2006-05-16 +Version: 0.05-13 +Date: 2006-05-17 Title: Calibration functions for analytical chemistry Author: Johannes Ranke Maintainer: Johannes Ranke diff --git a/R/calplot.R b/R/calplot.R index 2deed5a..753d333 100644 --- a/R/calplot.R +++ b/R/calplot.R @@ -1,21 +1,36 @@ -calplot <- function(object, xlim = "auto", ylim = "auto", - xlab = "Concentration", ylab = "Response", alpha=0.05) +calplot <- function(object, + xlim = c("auto","auto"), ylim = c("auto","auto"), + xlab = "Concentration", ylab = "Response", alpha=0.05, + varfunc = NULL) { UseMethod("calplot") } -calplot.default <- function(object, xlim = "auto", ylim = "auto", - xlab = "Concentration", ylab = "Response", alpha=0.05) +calplot.default <- function(object, + xlim = c("auto","auto"), ylim = c("auto","auto"), + xlab = "Concentration", ylab = "Response", + alpha=0.05, varfunc = NULL) { stop("Calibration plots only implemented for univariate lm objects.") } -calplot.lm <- function(object, xlim = "auto", ylim = "auto", - xlab = "Concentration", ylab = "Response", alpha=0.05) +calplot.lm <- function(object, + xlim = c("auto","auto"), ylim = c("auto","auto"), + xlab = "Concentration", ylab = "Response", alpha=0.05, + varfunc = NULL) { if (length(object$coef) > 2) stop("More than one independent variable in your model - not implemented") + if (length(object$weights) > 0) { + stop(paste( + "\nConfidence and prediction intervals for weighted linear models require", + "weights for the x values from which the predictions are to be generated.", + "This is not supported by the internally used predict.lm method.", + sep = "\n" + )) + } + if (alpha <= 0 | alpha >= 1) stop("Alpha should be between 0 and 1 (exclusive)") @@ -23,18 +38,29 @@ calplot.lm <- function(object, xlim = "auto", ylim = "auto", level <- 1 - alpha y <- m$model[[1]] x <- m$model[[2]] - newdata <- list(x = seq(0,max(x),length=250)) + if (xlim[1] == "auto") xlim[1] <- 0 + if (xlim[2] == "auto") xlim[2] <- max(x) + newdata <- list( + x = seq(from = xlim[1], to = xlim[2], length=250)) names(newdata) <- names(m$model)[[2]] - pred.lim <- predict(m, newdata, interval = "prediction",level=level) - conf.lim <- predict(m, newdata, interval = "confidence",level=level) - if (xlim == "auto") xlim = c(0,max(x)) - if (ylim == "auto") ylim = range(c(pred.lim,y,0)) + if (is.null(varfunc)) { + varfunc <- if (length(m$weights)) { + function(variable) mean(m$weights) + } else function(variable) rep(1,250) + } + pred.lim <- predict(m, newdata, interval = "prediction", + level=level, weights.newdata = varfunc(m)) + conf.lim <- predict(m, newdata, interval = "confidence", + level=level) + yrange.auto <- range(c(0,pred.lim)) + if (ylim[1] == "auto") ylim[1] <- yrange.auto[1] + if (ylim[2] == "auto") ylim[2] <- yrange.auto[2] plot(1, type = "n", xlab = xlab, ylab = ylab, - xlim = xlim, - ylim = ylim + xlim = as.numeric(xlim), + ylim = as.numeric(ylim) ) points(x,y, pch = 21, bg = "yellow") matlines(newdata[[1]], pred.lim, lty = c(1, 4, 4), diff --git a/R/inverse.predict.lm.R b/R/inverse.predict.lm.R index e5f014c..8352c26 100644 --- a/R/inverse.predict.lm.R +++ b/R/inverse.predict.lm.R @@ -59,7 +59,7 @@ inverse.predict.rlm <- function(object, newdata, ..., yx <- split(object$model[[yname]],object$model[[xname]]) n <- length(yx) - df <- n - length(objects$coef) + df <- n - length(object$coef) x <- as.numeric(names(yx)) ybar <- sapply(yx,mean) yhatx <- split(object$fitted.values,object$model[[xname]]) diff --git a/R/lod.R b/R/lod.R index 39ce7b3..54618c8 100644 --- a/R/lod.R +++ b/R/lod.R @@ -10,7 +10,18 @@ lod.default <- function(object, ..., alpha = 0.05, beta = 0.05) lod.lm <- function(object, ..., alpha = 0.05, beta = 0.05) { + if (length(object$weights) > 0) { + stop(paste( + "\nThe detemination of a lod from calibration models obtained by", + "weighted linear regression requires confidence intervals for", + "predicted y values taking into account weights for the x values", + "from which the predictions are to be generated.", + "This is not supported by the internally used predict.lm method.", + sep = "\n" + )) + } xname <- names(object$model)[[2]] + yname <- names(object$model)[[1]] newdata <- data.frame(0) names(newdata) <- xname y0 <- predict(object, newdata, interval="prediction", @@ -28,7 +39,9 @@ lod.lm <- function(object, ..., alpha = 0.05, beta = 0.05) } lod.x <- optimize(f,interval=c(0,max(object$model[[xname]])))$minimum newdata <- data.frame(x = lod.x) - names(lod.x) <- xname - lod.y <- predict(object, newdata = lod.x) - return(list(x = lod.x, y = lod.y)) + names(newdata) <- xname + lod.y <- predict(object, newdata) + lod <- list(lod.x, lod.y) + names(lod) <- c(xname, yname) + return(lod) } diff --git a/R/loq.R b/R/loq.R index c493a64..ee22d38 100644 --- a/R/loq.R +++ b/R/loq.R @@ -10,11 +10,21 @@ loq.default <- function(object, ..., alpha = 0.05, k = 3, n = 1, w = "auto") loq.lm <- function(object, ..., alpha = 0.05, k = 3, n = 1, w = "auto") { + xname <- names(object$model)[[2]] + yname <- names(object$model)[[1]] f <- function(x) { - y <- predict(object, data.frame(x = x)) + newdata <- data.frame(x = x) + names(newdata) <- xname + y <- predict(object, newdata) p <- inverse.predict(object, rep(y, n), ws = w, alpha = alpha) (p[["Prediction"]] - k * p[["Confidence"]])^2 } - tmp <- optimize(f,interval=c(0,max(object$model$x))) - return(tmp$minimum) + tmp <- optimize(f,interval=c(0,max(object$model[[2]]))) + loq.x <- tmp$minimum + newdata <- data.frame(x = loq.x) + names(newdata) <- xname + loq.y <- predict(object, newdata) + loq <- list(loq.x, loq.y) + names(loq) <- c(xname, yname) + return(loq) } diff --git a/TODO b/TODO new file mode 100644 index 0000000..1fc44a7 --- /dev/null +++ b/TODO @@ -0,0 +1,5 @@ +- lod(): At the moment, it is not possible to calculate an lod for the + case of more than one replicates (m is fixed to 1). The formula + for the prediction of y from mean(x) in Massart et al, p. 433 + could be used to generalize this. +- Write methods for nonlinear calibration functions diff --git a/inst/doc/chemCal-001.pdf b/inst/doc/chemCal-001.pdf index 748b5e1..9211cf1 100644 --- a/inst/doc/chemCal-001.pdf +++ b/inst/doc/chemCal-001.pdf @@ -2,8 +2,8 @@ %âãÏÓ\r 1 0 obj << -/CreationDate (D:20060516214219) -/ModDate (D:20060516214219) +/CreationDate (D:20060517131400) +/ModDate (D:20060517131400) /Title (R Graphics Output) /Producer (R 2.3.0) /Creator (R) diff --git a/inst/doc/chemCal.Rnw b/inst/doc/chemCal.Rnw index 26b224f..77888b4 100644 --- a/inst/doc/chemCal.Rnw +++ b/inst/doc/chemCal.Rnw @@ -19,7 +19,7 @@ Ron Wehrens led to the belief that it could be heavily improved if the inverse prediction method given in \cite{massart97} would be implemented, since it also covers the case of weighted regression. -At the moment, the package only consists of two functions, working +At the moment, the package consists of three functions, working on univariate linear models of class \texttt{lm} or \texttt{rlm}. When calibrating an analytical method, the first task is to generate @@ -27,6 +27,10 @@ a suitable model. If we want to use the \chemCal{} functions, we will have to restrict ourselves to univariate, possibly weighted, linear regression so far. +For the weighted case, the function \code{predict.lm} had to be +rewritten, in order to allow for weights for the x values used to +predict the y values. + Once such a model has been created, the calibration can be graphically shown by using the \texttt{calplot} function: @@ -34,12 +38,7 @@ shown by using the \texttt{calplot} function: library(chemCal) data(massart97ex3) attach(massart97ex3) -yx <- split(y,factor(x)) -ybar <- sapply(yx,mean) -s <- round(sapply(yx,sd),digits=2) -w <- round(1/(s^2),digits=3) -weights <- w[factor(x)] -m <- lm(y ~ x,w=weights) +m <- lm(y ~ x, w = rep(0.01,length(x))) calplot(m) @ diff --git a/inst/doc/chemCal.log b/inst/doc/chemCal.log index d399d73..86ecc8e 100644 --- a/inst/doc/chemCal.log +++ b/inst/doc/chemCal.log @@ -1,4 +1,4 @@ -This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=pdflatex 2006.4.5) 16 MAY 2006 21:42 +This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=pdflatex 2006.4.5) 17 MAY 2006 13:14 entering extended mode **chemCal.tex (./chemCal.tex @@ -346,4 +346,4 @@ type1/bluesky/cm/cmbx12.pfb> -Output written on chemCal.pdf (3 pages, 107306 bytes). +Output written on chemCal.pdf (3 pages, 107447 bytes). diff --git a/inst/doc/chemCal.pdf b/inst/doc/chemCal.pdf index 6036bd3..5c6b1ea 100644 Binary files a/inst/doc/chemCal.pdf and b/inst/doc/chemCal.pdf differ diff --git a/man/calplot.lm.Rd b/man/calplot.lm.Rd index 6d3f52d..734933d 100644 --- a/man/calplot.lm.Rd +++ b/man/calplot.lm.Rd @@ -39,13 +39,15 @@ } \examples{ # Example of a Calibration plot for a weighted regression +source("/home/ranke/tmp/r-base-2.3.0/src/library/stats/R/lm.R") data(massart97ex3) attach(massart97ex3) yx <- split(y,factor(x)) s <- round(sapply(yx,sd),digits=2) w <- round(1/(s^2),digits=3) weights <- w[factor(x)] -m <- lm(y ~ x,w=weights) +m <- lm(y ~ x,w=10 * weights) +calplot(m) calplot(m) } \author{ diff --git a/man/din32645.Rd b/man/din32645.Rd index d251b7c..94486c4 100644 --- a/man/din32645.Rd +++ b/man/din32645.Rd @@ -14,18 +14,33 @@ data(din32645) m <- lm(y ~ x, data=din32645) calplot(m) (prediction <- inverse.predict(m,3500,alpha=0.01)) -# This should give 0.074 according to DIN (cited from the Dintest test data) -round(prediction$Confidence,3) +# This should give 0.07434 according to Dintest test data, as +# collected from Procontrol 3.1 (isomehr GmbH) +round(prediction$Confidence,5) -# According to Dintest, we should get 0.07, but we get 0.0759 -lod(m, alpha = 0.01) +# According to Dintest test data, we should get 0.0698 for the critical value +# (decision limit, "Nachweisgrenze") +(lod <- lod(m, alpha = 0.01, beta = 0.5)) +round(lod$x,4) -# In German, there is the "Erfassungsgrenze", with k = 2, -# and we should get 0.14 according to Dintest -lod(m, k = 2, alpha = 0.01) +# In German, the smallest detectable value is the "Erfassungsgrenze", and we +# should get 0.140 according to Dintest test data, but with chemCal, we can't +# reproduce this, +lod(m, alpha = 0.01, beta = 0.01) +# except by using an equivalent to the approximation +# xD = 2 * Sc / A (Currie 1999, p. 118, or Orange Book, Chapter 18.4.3.7) +lod.approx <- 2 * lod$x +round(lod.approx, digits=3) +# which seems to be the pragmatic definition in DIN 32645, as judging from +# the Dintest test data. -# According to Dintest, we should get 0.21, we get 0.212 -loq(m, alpha = 0.01) +# This accords to the test data from Dintest again, except for the last digit +# of the value cited for Procontrol 3.1 (0.2121) +(loq <- loq(m, alpha = 0.01)) +round(loq$x,4) +# A similar value is obtained using the approximation +# LQ = 3.04 * LC (Currie 1999, p. 120) +3.04 * lod(m,alpha = 0.01, beta = 0.5)$x } \references{ DIN 32645 (equivalent to ISO 11843) @@ -33,5 +48,9 @@ loq(m, alpha = 0.01) Dintest. Plugin for MS Excel for evaluations of calibration data. Written by Georg Schmitt, University of Heidelberg. \url{http://www.rzuser.uni-heidelberg.de/~df6/download/dintest.htm} + + Currie, L. A. (1997) Nomenclature in evaluation of analytical methods including + detection and quantification capabilities (IUPAC Recommendations 1995). + Analytica Chimica Acta 391, 105 - 126. } \keyword{datasets} diff --git a/man/lod.Rd b/man/lod.Rd index 15f9603..fa8c8ad 100644 --- a/man/lod.Rd +++ b/man/lod.Rd @@ -38,18 +38,35 @@ the analyte is present (type II or false negative error), is beta (also a one-sided significance test). } +\note{ + - The default values for alpha and beta are recommended by IUPAC. + - The estimation of the LOD in terms of the analyte amount/concentration + xD from the LOD in the signal domain SD is done by simply inverting the + calibration function (i.e. assuming a known calibration function). +} \references{ + Massart, L.M, Vandenginste, B.G.M., Buydens, L.M.C., De Jong, S., Lewi, P.J., + Smeyers-Verbeke, J. (1997) Handbook of Chemometrics and Qualimetrics: Part A, + Chapter 13.7.8 + J. Inczedy, T. Lengyel, and A.M. Ure (2002) International Union of Pure and Applied Chemistry Compendium of Analytical Nomenclature: Definitive Rules. Web edition. + + Currie, L. A. (1997) Nomenclature in evaluation of analytical methods including + detection and quantification capabilities (IUPAC Recommendations 1995). + Analytica Chimica Acta 391, 105 - 126. } \examples{ data(din32645) m <- lm(y ~ x, data = din32645) - # The decision limit (critical value) is obtained by using beta = 0.5: - lod(m, alpha = 0.01, beta = 0.5) # approx. Nachweisgrenze in Dintest 2002 - lod(m, alpha = 0.01, beta = 0.01) - # In the latter case (Erfassungsgrenze), we get a slight deviation from - # Dintest 2002 test data. + lod(m) + + # The critical value (decision limit, German Nachweisgrenze) can be obtained + # by using beta = 0.5: + lod(m, alpha = 0.01, beta = 0.5) + # or approximated by + 2 * lod(m, alpha = 0.01, beta = 0.5)$x + # for the case of known, constant variance (homoscedastic data) } \keyword{manip} diff --git a/man/loq.Rd b/man/loq.Rd index 1030399..4850487 100644 --- a/man/loq.Rd +++ b/man/loq.Rd @@ -49,6 +49,11 @@ limit of detection is the x value, where the relative error of the quantification with the given calibration model is 1/k. } +\note{ + IUPAC recommends to base the LOQ on the standard deviation of the + signal where x = 0. The approach taken here is to my knowledge + original to the chemCal package. +} \examples{ data(massart97ex3) attach(massart97ex3) @@ -68,9 +73,9 @@ mwy <- lm(y ~ x, w = 1/y) # Let's do this with one iteration only - loq(mwy, w = 1 / predict(mwy,list(x = loq(mwy)))) + loq(mwy, w = 1 / predict(mwy,list(x = loq(mwy)$x))) # We can get better by doing replicate measurements - loq(mwy, n = 3, w = 1 / predict(mwy,list(x = loq(mwy)))) + loq(mwy, n = 3, w = 1 / predict(mwy,list(x = loq(mwy)$x))) } \keyword{manip} diff --git a/tests/din32645.R b/tests/din32645.R new file mode 100644 index 0000000..dc0aee6 --- /dev/null +++ b/tests/din32645.R @@ -0,0 +1,7 @@ +library(chemCal) +data(din32645) +m <- lm(y ~ x, data=din32645) +inverse.predict(m,3500,alpha=0.01) +lod <- lod(m, alpha = 0.01, beta = 0.5) +lod(m, alpha = 0.01, beta = 0.01) +loq <- loq(m, alpha = 0.01) diff --git a/tests/din32645.Rout.save b/tests/din32645.Rout.save new file mode 100644 index 0000000..10cd1ab --- /dev/null +++ b/tests/din32645.Rout.save @@ -0,0 +1,43 @@ + +R : Copyright 2006, The R Foundation for Statistical Computing +Version 2.3.0 (2006-04-24) +ISBN 3-900051-07-0 + +R is free software and comes with ABSOLUTELY NO WARRANTY. +You are welcome to redistribute it under certain conditions. +Type 'license()' or 'licence()' for distribution details. + +R is a collaborative project with many contributors. +Type 'contributors()' for more information and +'citation()' on how to cite R or R packages in publications. + +Type 'demo()' for some demos, 'help()' for on-line help, or +'help.start()' for an HTML browser interface to help. +Type 'q()' to quit R. + +> library(chemCal) +> data(din32645) +> m <- lm(y ~ x, data=din32645) +> inverse.predict(m,3500,alpha=0.01) +$Prediction +[1] 0.1054792 + +$`Standard Error` +[1] 0.02215619 + +$Confidence +[1] 0.07434261 + +$`Confidence Limits` +[1] 0.03113656 0.17982178 + +> lod <- lod(m, alpha = 0.01, beta = 0.5) +> lod(m, alpha = 0.01, beta = 0.01) +$x +[1] 0.132904 + +$y +[1] 3764.977 + +> loq <- loq(m, alpha = 0.01) +> diff --git a/tests/massart97.R b/tests/massart97.R new file mode 100644 index 0000000..7170ec4 --- /dev/null +++ b/tests/massart97.R @@ -0,0 +1,12 @@ +library(chemCal) +data(massart97ex3) +attach(massart97ex3) +yx <- split(y,x) +ybar <- sapply(yx,mean) +s <- round(sapply(yx,sd),digits=2) +w <- round(1/(s^2),digits=3) +weights <- w[factor(x)] +m <- lm(y ~ x,w=weights) +# The following concords with the book +inverse.predict(m, 15, ws = 1.67) +inverse.predict(m, 90, ws = 0.145) diff --git a/tests/massart97.Rout.save b/tests/massart97.Rout.save new file mode 100644 index 0000000..ae50275 --- /dev/null +++ b/tests/massart97.Rout.save @@ -0,0 +1,54 @@ + +R : Copyright 2006, The R Foundation for Statistical Computing +Version 2.3.0 (2006-04-24) +ISBN 3-900051-07-0 + +R is free software and comes with ABSOLUTELY NO WARRANTY. +You are welcome to redistribute it under certain conditions. +Type 'license()' or 'licence()' for distribution details. + +R is a collaborative project with many contributors. +Type 'contributors()' for more information and +'citation()' on how to cite R or R packages in publications. + +Type 'demo()' for some demos, 'help()' for on-line help, or +'help.start()' for an HTML browser interface to help. +Type 'q()' to quit R. + +> library(chemCal) +> data(massart97ex3) +> attach(massart97ex3) +> yx <- split(y,x) +> ybar <- sapply(yx,mean) +> s <- round(sapply(yx,sd),digits=2) +> w <- round(1/(s^2),digits=3) +> weights <- w[factor(x)] +> m <- lm(y ~ x,w=weights) +> # The following concords with the book +> inverse.predict(m, 15, ws = 1.67) +$Prediction +[1] 5.865367 + +$`Standard Error` +[1] 0.892611 + +$Confidence +[1] 2.478285 + +$`Confidence Limits` +[1] 3.387082 8.343652 + +> inverse.predict(m, 90, ws = 0.145) +$Prediction +[1] 44.06025 + +$`Standard Error` +[1] 2.829162 + +$Confidence +[1] 7.855012 + +$`Confidence Limits` +[1] 36.20523 51.91526 + +> -- cgit v1.2.1