R/inverse.predict.lm.R


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163

#' Predict x from y for a linear calibration
#' 
#' This function predicts x values using a univariate linear model that has
#' been generated for the purpose of calibrating a measurement method.
#' Prediction intervals are given at the specified confidence level.  The
#' calculation method was taken from Massart et al. (1997). In particular,
#' Equations 8.26 and 8.28 were combined in order to yield a general treatment
#' of inverse prediction for univariate linear models, taking into account
#' weights that have been used to create the linear model, and at the same time
#' providing the possibility to specify a precision in sample measurements
#' differing from the precision in standard samples used for the calibration.
#' This is elaborated in the package vignette.
#'
#' This is an implementation of Equation (8.28) in the Handbook of Chemometrics
#' and Qualimetrics, Part A, Massart et al (1997), page 200, validated with
#' Example 8 on the same page, extended as specified in the package vignette
#' 
#' @aliases inverse.predict inverse.predict.lm inverse.predict.rlm
#' inverse.predict.default
#' @param object A univariate model object of class \code{\link{lm}} or
#' \code{\link[MASS:rlm]{rlm}} with model formula \code{y ~ x} or \code{y ~ x -
#' 1}.
#' @param newdata A vector of observed y values for one sample.
#' @param \dots Placeholder for further arguments that might be needed by
#' future implementations.
#' @param ws The weight attributed to the sample. This argument is obligatory
#' if \code{object} has weights.
#' @param alpha The error tolerance level for the confidence interval to be
#' reported.
#' @param var.s The estimated variance of the sample measurements. The default
#' is to take the residual standard error from the calibration and to adjust it
#' using \code{ws}, if applicable. This means that \code{var.s} overrides
#' \code{ws}.
#' @return A list containing the predicted x value, its standard error and a
#' confidence interval.
#' @note The function was validated with examples 7 and 8 from Massart et al.
#' (1997).  Note that the behaviour of inverse.predict changed with chemCal
#' version 0.2.1. Confidence intervals for x values obtained from calibrations
#' with replicate measurements did not take the variation about the means into
#' account.  Please refer to the vignette for details.
#' @references Massart, L.M, Vandenginste, B.G.M., Buydens, L.M.C., De Jong,
#' S., Lewi, P.J., Smeyers-Verbeke, J. (1997) Handbook of Chemometrics and
#' Qualimetrics: Part A, p. 200
#' @importFrom stats optimize predict qt
#' @examples
#' 
#' # This is example 7 from Chapter 8 in Massart et al. (1997)
#' m <- lm(y ~ x, data = massart97ex1)
#' inverse.predict(m, 15)        #  6.1 +- 4.9
#' inverse.predict(m, 90)        # 43.9 +- 4.9
#' inverse.predict(m, rep(90,5)) # 43.9 +- 3.2
#' 
#' # For reproducing the results for replicate standard measurements in example 8,
#' # we need to do the calibration on the means when using chemCal > 0.2
#' weights <- with(massart97ex3, {
#'   yx <- split(y, x)
#'   ybar <- sapply(yx, mean)
#'   s <- round(sapply(yx, sd), digits = 2)
#'   w <- round(1 / (s^2), digits = 3)
#' })
#' 
#' massart97ex3.means <- aggregate(y ~ x, massart97ex3, mean)
#' 
#' m3.means <- lm(y ~ x, w = weights, data = massart97ex3.means)
#' 
#' inverse.predict(m3.means, 15, ws = 1.67)  # 5.9 +- 2.5
#' inverse.predict(m3.means, 90, ws = 0.145) # 44.1 +- 7.9
#' 
#' 
#' @export
inverse.predict <- function(object, newdata, ...,
  ws = "auto", alpha = 0.05, var.s = "auto")
{
  UseMethod("inverse.predict")
}

#' @export
inverse.predict.default <- function(object, newdata, ...,
  ws = "auto", alpha = 0.05, var.s = "auto")
{
  stop("Inverse prediction only implemented for univariate lm objects.")
}

#' @export
inverse.predict.lm <- function(object, newdata, ...,
  ws = "auto", alpha = 0.05, var.s = "auto")
{
  yname = names(object$model)[[1]]
  xname = names(object$model)[[2]]
  if (ws == "auto") {
    ws <- ifelse(length(object$weights) > 0, mean(object$weights), 1)
  }
  if (length(object$weights) > 0) {
    w <- object$weights
  } else {
    w <- rep(1, nrow(object$model))
  }
  .inverse.predict(object = object, newdata = newdata, 
    ws = ws, alpha = alpha, var.s = var.s, w = w, xname = xname, yname = yname)
}

#' @export
inverse.predict.rlm <- function(object, newdata, ...,
  ws = "auto", alpha = 0.05, var.s = "auto")
{
  yname = names(object$model)[[1]]
  xname = names(object$model)[[2]]
  if (ws == "auto") {
    ws <- mean(object$w)
  }
  w <- object$w
  .inverse.predict(object = object, newdata = newdata, 
    ws = ws, alpha = alpha, var.s = var.s, w = w, xname = xname, yname = yname)
}

.inverse.predict <- function(object, newdata, ws, alpha, var.s, w, xname, yname)
{
  if (length(object$coef) > 2)
    stop("More than one independent variable in your model - not implemented")

  if (alpha <= 0 | alpha >= 1)
    stop("Alpha should be between 0 and 1 (exclusive)")

  ybars <- mean(newdata)
  m <- length(newdata)

  n <- nrow(object$model)
  df <- n - length(object$coef)
  xi <- object$model[[2]]
  yi <- object$model[[1]]
  yihat <- object$fitted.values

  se <- sqrt(sum(w * (yi - yihat)^2)/df)

  if (var.s == "auto") {
    var.s <- se^2/ws
  }

  b1 <- object$coef[[xname]]

  ybarw <- sum(w * yi)/sum(w)

# This is the adapted form of equation 8.28 (see package vignette)
  sxhats <- 1/b1 * sqrt(
    (var.s / m) + 
    se^2 * (1/sum(w) + 
      (ybars - ybarw)^2 * sum(w) /
        (b1^2 * (sum(w) * sum(w * xi^2) - sum(w * xi)^2)))
  )

  if (names(object$coef)[1] == "(Intercept)") {
    b0 <- object$coef[["(Intercept)"]]
  } else {
    b0 <- 0
  }

  xs <- (ybars - b0) / b1
  t <- qt(1-0.5*alpha, n - 2)
  conf <- t * sxhats
  result <- list("Prediction"=xs,"Standard Error"=sxhats,
    "Confidence"=conf, "Confidence Limits"=c(xs - conf, xs + conf))
  return(result)
}