R/confint.mkinfit.R


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230

#' Confidence intervals for parameters of mkinfit objects
#'
#' The default method 'quadratic' is based on the quadratic approximation of
#' the curvature of the likelihood function at the maximum likelihood parameter
#' estimates.
#' The alternative method 'profile' is based on the profile likelihood for each
#' parameter. The method uses two nested optimisations and can take a very long
#' time, even if parallelized by specifying 'cores' on unixoid platforms. The
#' speed of the method could likely be improved by using the method of Venzon
#' and Moolgavkar (1988).
#'
#' @param object An \code{\link{mkinfit}} object
#' @param parm A vector of names of the parameters which are to be given
#'   confidence intervals. If missing, all parameters are considered.
#' @param level The confidence level required
#' @param alpha The allowed error probability, overrides 'level' if specified.
#' @param cutoff Possibility to specify an alternative cutoff for the difference
#'   in the log-likelihoods at the confidence boundary. Specifying an explicit
#'   cutoff value overrides arguments 'level' and 'alpha'
#' @param method The 'quadratic' method approximates the likelihood function at
#'   the optimised parameters using the second term of the Taylor expansion,
#'   using a second derivative (hessian) contained in the object.
#'   The 'profile' method searches the parameter space for the
#'   cutoff of the confidence intervals by means of a likelihood ratio test.
#' @param transformed If the quadratic approximation is used, should it be
#'   applied to the likelihood based on the transformed parameters?
#' @param backtransform If we approximate the likelihood in terms of the
#'   transformed parameters, should we backtransform the parameters with
#'   their confidence intervals?
#' @param cores The number of cores to be used for multicore processing. 
#'   On Windows machines, cores > 1 is currently not supported.
#' @param quiet Should we suppress the message "Profiling the likelihood"
#' @return A matrix with columns giving lower and upper confidence limits for
#'   each parameter.
#' @param \dots Not used
#' @importFrom stats qnorm
#' @references
#'   Bates DM and Watts GW (1988) Nonlinear regression analysis & its applications
#'
#'   Pawitan Y (2013) In all likelihood - Statistical modelling and
#'   inference using likelihood. Clarendon Press, Oxford.
#'
#'   Venzon DJ and Moolgavkar SH (1988) A Method for Computing
#'   Profile-Likelihood Based Confidence Intervals, Applied Statistics, 37,
#'   87–94.
#' @examples
#' f <- mkinfit("SFO", FOCUS_2006_C, quiet = TRUE)
#' confint(f, method = "quadratic")
#'
#' \dontrun{
#' confint(f, method = "profile")
#'
#' # Set the number of cores for the profiling method for further examples
#' if (identical(Sys.getenv("NOT_CRAN"), "true")) {
#'   n_cores <- parallel::detectCores() - 1
#' } else {
#'  n_cores <- 1
#' }
#' if (Sys.getenv("TRAVIS") != "") n_cores = 1
#' if (Sys.info()["sysname"] == "Windows") n_cores = 1
#'
#' SFO_SFO <- mkinmod(parent = mkinsub("SFO", "m1"), m1 = mkinsub("SFO"), quiet = TRUE)
#' SFO_SFO.ff <- mkinmod(parent = mkinsub("SFO", "m1"), m1 = mkinsub("SFO"),
#'   use_of_ff = "max", quiet = TRUE)
#' f_d_1 <- mkinfit(SFO_SFO, subset(FOCUS_2006_D, value != 0), quiet = TRUE)
#' system.time(ci_profile <- confint(f_d_1, method = "profile", cores = 1, quiet = TRUE))
#' # Using more cores does not save much time here, as parent_0 takes up most of the time
#' # If we additionally exclude parent_0 (the confidence of which is often of
#' # minor interest), we get a nice performance improvement from about 50
#' # seconds to about 12 seconds if we use at least four cores
#' system.time(ci_profile_no_parent_0 <- confint(f_d_1, method = "profile",
#'   c("k_parent_sink", "k_parent_m1", "k_m1_sink", "sigma"), cores = n_cores))
#' ci_profile
#' ci_quadratic_transformed <- confint(f_d_1, method = "quadratic")
#' ci_quadratic_transformed
#' ci_quadratic_untransformed <- confint(f_d_1, method = "quadratic", transformed = FALSE)
#' ci_quadratic_untransformed
#' # Against the expectation based on Bates and Watts (1988), the confidence
#' # intervals based on the internal parameter transformation are less
#' # congruent with the likelihood based intervals. Note the superiority of the
#' # interval based on the untransformed fit for k_m1_sink
#' rel_diffs_transformed <- abs((ci_quadratic_transformed - ci_profile)/ci_profile)
#' rel_diffs_untransformed <- abs((ci_quadratic_untransformed - ci_profile)/ci_profile)
#' rel_diffs_transformed < rel_diffs_untransformed
#' signif(rel_diffs_transformed, 3)
#' signif(rel_diffs_untransformed, 3)
#'
#'
#' # Investigate a case with formation fractions
#' f_d_2 <- mkinfit(SFO_SFO.ff, subset(FOCUS_2006_D, value != 0), quiet = TRUE)
#' ci_profile_ff <- confint(f_d_2, method = "profile", cores = n_cores)
#' ci_profile_ff
#' ci_quadratic_transformed_ff <- confint(f_d_2, method = "quadratic")
#' ci_quadratic_transformed_ff
#' ci_quadratic_untransformed_ff <- confint(f_d_2, method = "quadratic", transformed = FALSE)
#' ci_quadratic_untransformed_ff
#' rel_diffs_transformed_ff <- abs((ci_quadratic_transformed_ff - ci_profile_ff)/ci_profile_ff)
#' rel_diffs_untransformed_ff <- abs((ci_quadratic_untransformed_ff - ci_profile_ff)/ci_profile_ff)
#' # While the confidence interval for the parent rate constant is closer to
#' # the profile based interval when using the internal parameter
#' # transformation, the interval for the metabolite rate constant is 'better
#' # without internal parameter transformation.
#' rel_diffs_transformed_ff < rel_diffs_untransformed_ff
#' rel_diffs_transformed_ff
#' rel_diffs_untransformed_ff
#'
#' # The profiling for the following fit does not finish in a reasonable time,
#' # therefore we use the quadratic approximation
#' m_synth_DFOP_par <- mkinmod(parent = mkinsub("DFOP", c("M1", "M2")),
#'   M1 = mkinsub("SFO"),
#'   M2 = mkinsub("SFO"),
#'   use_of_ff = "max", quiet = TRUE)
#' DFOP_par_c <- synthetic_data_for_UBA_2014[[12]]$data
#' f_tc_2 <- mkinfit(m_synth_DFOP_par, DFOP_par_c, error_model = "tc",
#'   error_model_algorithm = "direct", quiet = TRUE)
#' confint(f_tc_2, method = "quadratic")
#' confint(f_tc_2, "parent_0", method = "quadratic")
#' }
#' @export
confint.mkinfit <- function(object, parm,
  level = 0.95, alpha = 1 - level, cutoff,
  method = c("quadratic", "profile"),
  transformed = TRUE, backtransform = TRUE,
  cores = round(detectCores()/2), quiet = FALSE, ...)
{
  tparms <- parms(object, transformed = TRUE)
  bparms <- parms(object, transformed = FALSE)
  tpnames <- names(tparms)
  bpnames <- names(bparms)

  return_pnames <- if (missing(parm)) {
    if (backtransform) bpnames else tpnames
  } else {
    parm
  }

  p <- length(return_pnames)

  method <- match.arg(method)

  a <- c(alpha / 2, 1 - (alpha / 2))

  if (method == "quadratic") {

    quantiles <- qt(a, object$df.residual)

    covar_pnames <- if (missing(parm)) {
      if (transformed) tpnames else bpnames
    } else {
      parm
    }

    return_parms <- if (backtransform) bparms[return_pnames]
      else tparms[return_pnames]

    covar_parms <- if (transformed) tparms[covar_pnames]
      else bparms[covar_pnames]

    if (transformed) {
      covar <- try(solve(object$hessian), silent = TRUE)
    } else {
      covar <- try(solve(object$hessian_notrans), silent = TRUE)
    }

    # If inverting the covariance matrix failed or produced NA values
    if (!is.numeric(covar) | is.na(covar[1])) {
      ses <- lci <- uci <- rep(NA, p)
    } else {
      ses     <- sqrt(diag(covar))[covar_pnames]
      lci    <- covar_parms + quantiles[1] * ses
      uci    <- covar_parms + quantiles[2] * ses
      if (transformed & backtransform) {
        lci_back <- backtransform_odeparms(lci,
          object$mkinmod, object$transform_rates, object$transform_fractions)
        uci_back <- backtransform_odeparms(uci,
          object$mkinmod, object$transform_rates, object$transform_fractions)

        return_errparm_names <- intersect(names(object$errparms), return_pnames)
        lci <- c(lci_back, lci[return_errparm_names])
        uci <- c(uci_back, uci[return_errparm_names])
      }
    }
    ci <- cbind(lower = lci, upper = uci)
  }

  if (method == "profile") {
    if (!quiet) message("Profiling the likelihood")

    lci <- uci <- rep(NA, p)
    names(lci) <- names(uci) <- return_pnames

    profile_pnames <- if(missing(parm)) names(parms(object))
      else parm

    if (missing(cutoff)) {
      cutoff <- 0.5 * qchisq(1 - alpha, 1)
    }

    all_parms <- parms(object)

    get_ci <- function(pname) {
      pnames_free <- setdiff(names(all_parms), pname)
      profile_ll <- function(x)
      {
        pll_cost <- function(P) {
          parms_cost <- all_parms
          parms_cost[pnames_free] <- P[pnames_free]
          parms_cost[pname] <- x
          - object$ll(parms_cost)
        }
        - nlminb(all_parms[pnames_free], pll_cost)$objective
      }

      cost <- function(x) {
        (cutoff - (object$logLik - profile_ll(x)))^2
      }

      lci_pname <- optimize(cost, lower = 0, upper = all_parms[pname])$minimum
      uci_pname <- optimize(cost, lower = all_parms[pname],
        upper = ifelse(grepl("^f_|^g$", pname), 1, 15 * all_parms[pname]))$minimum
      return(c(lci_pname, uci_pname))
    }
    ci <- t(parallel::mcmapply(get_ci, profile_pnames, mc.cores = cores))
  }

  colnames(ci) <- paste0(
    format(100 * a, trim = TRUE, scientific = FALSE, digits = 3), "%")

  return(ci)
}