aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorJohannes Ranke <jranke@uni-bremen.de>2020-05-09 21:18:42 +0200
committerJohannes Ranke <jranke@uni-bremen.de>2020-05-09 21:18:42 +0200
commitefab37957381919c21d874906ce870f4941c760a (patch)
treed485fa148ec1513a0c0810780a1ed10c4f9097d2 /R
parent47ef00e3d0a961f8fbecf0bd5da0283bed21bb96 (diff)
Avoid the call to merge for analytical solutions
This increases performance up to a factor of five!
Diffstat (limited to 'R')
-rw-r--r--R/create_deg_func.R32
-rw-r--r--R/mkinerrmin.R28
-rw-r--r--R/mkinfit.R75
-rw-r--r--R/mkinmod.R2
-rw-r--r--R/mkinpredict.R17
-rw-r--r--R/plot.mkinfit.R1
6 files changed, 83 insertions, 72 deletions
diff --git a/R/create_deg_func.R b/R/create_deg_func.R
index fb419a98..886c5e8b 100644
--- a/R/create_deg_func.R
+++ b/R/create_deg_func.R
@@ -21,19 +21,17 @@
create_deg_func <- function(spec, use_of_ff = c("min", "max")) {
use_of_ff <- match.arg(use_of_ff)
-
min_ff <- use_of_ff == "min"
-
obs_vars <- names(spec)
- n <- character(0)
-
parent <- obs_vars[1]
parent_type <- spec[[1]]$type
supported <- TRUE # This may be modified below
- n[1] <- paste0(parent, " = ", parent_type, ".solution(outtimes, odeini['", parent,
+ predicted_text <- character(0)
+
+ predicted_text[parent] <- paste0(parent_type, ".solution(t, odeini['", parent,
if (parent_type == "SFORB") "_free", "'], ",
switch(parent_type,
SFO = paste0("k_", parent, if (min_ff) "_sink" else "", ")"),
@@ -59,25 +57,27 @@ create_deg_func <- function(spec, use_of_ff = c("min", "max")) {
k2 <- paste0("k_", n2)
f12 <- paste0("f_", n1, "_to_", n2)
if (parent_type == "SFO") {
- n[2] <- paste0(n2, " = (((", k2, "-", k1, ")*", n20, "-", f12, "*", k1, "*", n10, ")*exp(-", k2, "*outtimes)+",
- f12, "*", k1, "*", n10, "*exp(-", k1, "*outtimes))/(", k2, "-", k1, ")")
+ predicted_text[n2] <- paste0(
+ "(((", k2, "-", k1, ")*", n20, "-", f12, "*", k1, "*", n10, ")*exp(-", k2, "*t)+",
+ f12, "*", k1, "*", n10, "*exp(-", k1, "*t))/(", k2, "-", k1, ")")
}
}
}
if (supported) {
- all_n <- paste(n, collapse = ",\n")
+ deg_func <- function(observed, odeini, odeparms) {}
f_body <- paste0("{\n",
- "out <- with(as.list(odeparms), {\n",
- "data.frame(\n",
- "time = outtimes,\n",
- all_n, ")\n",
+ "predicted <- numeric(0)\n",
+ "with(as.list(odeparms), {\n")
+ for (obs_var in obs_vars) {
+ f_body <- paste0(f_body,
+ "t <- observed[observed$name == '", obs_var, "', 'time']\n",
+ "predicted <<- c(predicted, ", predicted_text[obs_var], ")\n")
+ }
+ f_body <- paste0(f_body,
"})\n",
- "return(out)\n}\n"
- )
-
- deg_func <- function(odeini, odeparms, outtimes) {}
+ "return(predicted)\n}\n")
body(deg_func) <- parse(text = f_body)
return(deg_func)
diff --git a/R/mkinerrmin.R b/R/mkinerrmin.R
index 0b647b81..1994aed0 100644
--- a/R/mkinerrmin.R
+++ b/R/mkinerrmin.R
@@ -1,12 +1,12 @@
if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "value_mean"))
#' Calculate the minimum error to assume in order to pass the variance test
-#'
+#'
#' This function finds the smallest relative error still resulting in passing
#' the chi-squared test as defined in the FOCUS kinetics report from 2006.
-#'
+#'
#' This function is used internally by \code{\link{summary.mkinfit}}.
-#'
+#'
#' @param fit an object of class \code{\link{mkinfit}}.
#' @param alpha The confidence level chosen for the chi-squared test.
#' @importFrom stats qchisq aggregate
@@ -25,43 +25,41 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "value_mean"))
#' \url{http://esdac.jrc.ec.europa.eu/projects/degradation-kinetics}
#' @keywords manip
#' @examples
-#'
+#'
#' SFO_SFO = mkinmod(parent = mkinsub("SFO", to = "m1"),
#' m1 = mkinsub("SFO"),
#' use_of_ff = "max")
-#'
+#'
#' fit_FOCUS_D = mkinfit(SFO_SFO, FOCUS_2006_D, quiet = TRUE)
#' round(mkinerrmin(fit_FOCUS_D), 4)
#' \dontrun{
#' fit_FOCUS_E = mkinfit(SFO_SFO, FOCUS_2006_E, quiet = TRUE)
#' round(mkinerrmin(fit_FOCUS_E), 4)
#' }
-#'
+#'
#' @export
mkinerrmin <- function(fit, alpha = 0.05)
{
parms.optim <- fit$par
kinerrmin <- function(errdata, n.parms) {
- means.mean <- mean(errdata$value_mean, na.rm = TRUE)
- df = length(errdata$value_mean) - n.parms
+ means.mean <- mean(errdata$observed, na.rm = TRUE)
+ df = nrow(errdata) - n.parms
err.min <- sqrt((1 / qchisq(1 - alpha, df)) *
- sum((errdata$value_mean - errdata$value_pred)^2)/(means.mean^2))
+ sum((errdata$observed - errdata$predicted)^2)/(means.mean^2))
return(list(err.min = err.min, n.optim = n.parms, df = df))
}
- means <- aggregate(value ~ time + name, data = fit$observed, mean, na.rm=TRUE)
- errdata <- merge(means, fit$predicted, by = c("time", "name"),
- suffixes = c("_mean", "_pred"))
- errdata <- errdata[order(errdata$time, errdata$name), ]
+ errdata <- aggregate(cbind(observed, predicted) ~ time + variable, data = fit$data, mean, na.rm=TRUE)
+ errdata <- errdata[order(errdata$time, errdata$variable), ]
# Remove values at time zero for variables whose value for state.ini is fixed,
# as these will not have any effect in the optimization and should therefore not
# be counted as degrees of freedom.
fixed_initials = gsub("_0$", "", rownames(subset(fit$fixed, type = "state")))
- errdata <- subset(errdata, !(time == 0 & name %in% fixed_initials))
+ errdata <- subset(errdata, !(time == 0 & variable %in% fixed_initials))
n.optim.overall <- length(parms.optim) - length(fit$errparms)
@@ -73,7 +71,7 @@ mkinerrmin <- function(fit, alpha = 0.05)
# The degrees of freedom are counted according to FOCUS kinetics (2011, p. 164)
for (obs_var in fit$obs_vars)
{
- errdata.var <- subset(errdata, name == obs_var)
+ errdata.var <- subset(errdata, variable == obs_var)
# Check if initial value is optimised
n.initials.optim <- length(grep(paste(obs_var, ".*", "_0", sep=""), names(parms.optim)))
diff --git a/R/mkinfit.R b/R/mkinfit.R
index 6cfb88c0..c33afbf0 100644
--- a/R/mkinfit.R
+++ b/R/mkinfit.R
@@ -288,6 +288,10 @@ mkinfit <- function(mkinmod, observed,
observed <- subset(observed, value != 0)
}
+ # Sort observed values for efficient analytical predictions
+ observed$name <- ordered(observed$name, levels = obs_vars)
+ observed <- observed[order(observed$name, observed$time), ]
+
# Obtain data for decline from maximum mean value if requested
if (from_max_mean) {
# This is only used for simple decline models
@@ -520,7 +524,7 @@ mkinfit <- function(mkinmod, observed,
}
# Unique outtimes for model solution.
- outtimes = sort(unique(observed$time))
+ outtimes <- sort(unique(observed$time))
# Define the objective function for optimisation, including (back)transformations
cost_function <- function(P, trans = TRUE, OLS = FALSE, fixed_degparms = FALSE, fixed_errparms = FALSE, update_data = TRUE, ...)
@@ -578,47 +582,51 @@ mkinfit <- function(mkinmod, observed,
}
# Solve the system with current parameter values
- out <- mkinpredict(mkinmod, parms,
- odeini, outtimes,
- solution_type = solution_type,
- use_compiled = use_compiled,
- method.ode = method.ode,
- atol = atol, rtol = rtol, ...)
-
- out_long <- mkin_wide_to_long(out, time = "time")
-
- # Surprisingly, the next line is the one taking the most time for one, two
- # or three observed variables if we use compiled ODE models
- # as evidenced by use of github:hadley/lineprof
- cost_data <- merge(observed[c("name", "time", "value")], out_long,
- by = c("name", "time"), suffixes = c(".observed", ".predicted"))
+ if (solution_type == "analytical") {
+ observed$predicted <- mkinmod$deg_func(observed, odeini, parms)
+ } else {
+ out <- mkinpredict(mkinmod, parms,
+ odeini, outtimes,
+ solution_type = solution_type,
+ use_compiled = use_compiled,
+ method.ode = method.ode,
+ atol = atol, rtol = rtol, ...)
+
+ out_long <- mkin_wide_to_long(out, time = "time")
+
+ out_merged <- merge(observed[c("name", "time")], out_long)
+ out_merged$name <- ordered(out_merged$name, levels = obs_vars)
+ out_merged <- out_merged[order(out_merged$name, out_merged$time), ]
+ observed$predicted <- out_merged$value
+ }
+ # Define standard deviation for each observation
if (err_mod == "const") {
- cost_data$std <- if (OLS) NA else cost_errparms["sigma"]
+ observed$std <- if (OLS) NA else cost_errparms["sigma"]
}
if (err_mod == "obs") {
- std_names <- paste0("sigma_", cost_data$name)
- cost_data$std <- cost_errparms[std_names]
+ std_names <- paste0("sigma_", observed$name)
+ observed$std <- cost_errparms[std_names]
}
if (err_mod == "tc") {
- cost_data$std <- sqrt(cost_errparms["sigma_low"]^2 + cost_data$value.predicted^2 * cost_errparms["rsd_high"]^2)
+ observed$std <- sqrt(cost_errparms["sigma_low"]^2 + observed$predicted^2 * cost_errparms["rsd_high"]^2)
}
+ # Calculate model cost
if (OLS) {
# Cost is the sum of squared residuals
- cost <- with(cost_data, sum((value.observed - value.predicted)^2))
+ cost <- with(observed, sum((value - predicted)^2))
} else {
# Cost is the negative log-likelihood
- cost <- - with(cost_data,
- sum(dnorm(x = value.observed, mean = value.predicted, sd = std, log = TRUE)))
+ cost <- - with(observed,
+ sum(dnorm(x = value, mean = predicted, sd = std, log = TRUE)))
}
# We update the current cost and data during the optimisation, not
# during hessian calculations
if (update_data) {
- assign("out_predicted", out_long, inherits = TRUE)
- assign("current_data", cost_data, inherits = TRUE)
+ assign("current_data", observed, inherits = TRUE)
if (cost < cost.current) {
assign("cost.current", cost, inherits = TRUE)
@@ -718,7 +726,7 @@ mkinfit <- function(mkinmod, observed,
degparms <- fit$par
# Get the maximum likelihood estimate for sigma at the optimum parameter values
- current_data$residual <- current_data$value.observed - current_data$value.predicted
+ current_data$residual <- current_data$value - current_data$predicted
sigma_mle <- sqrt(sum(current_data$residual^2)/nrow(current_data))
# Use that estimate for the constant variance, or as first guess if err_mod = "obs"
@@ -855,11 +863,7 @@ mkinfit <- function(mkinmod, observed,
# We also need the model and a model name for summary and plotting
fit$mkinmod <- mkinmod
fit$mkinmod$name <- mkinmod_name
-
- # We need data and predictions for summary and plotting
- fit$observed <- observed
fit$obs_vars <- obs_vars
- fit$predicted <- out_predicted
# Residual sum of squares as a function of the fitted parameters
fit$rss <- function(P) cost_function(P, OLS = TRUE, update_data = FALSE)
@@ -886,15 +890,10 @@ mkinfit <- function(mkinmod, observed,
fit$fixed$type = c(rep("state", length(state.ini.fixed)),
rep("deparm", length(parms.fixed)))
- # Sort observed, predicted and residuals
- current_data$name <- ordered(current_data$name, levels = obs_vars)
-
- ordered_data <- current_data[order(current_data$name, current_data$time), ]
-
- fit$data <- data.frame(time = ordered_data$time,
- variable = ordered_data$name,
- observed = ordered_data$value.observed,
- predicted = ordered_data$value.predicted)
+ fit$data <- data.frame(time = current_data$time,
+ variable = current_data$name,
+ observed = current_data$value,
+ predicted = current_data$predicted)
fit$data$residual <- fit$data$observed - fit$data$predicted
diff --git a/R/mkinmod.R b/R/mkinmod.R
index 21551861..7e9e9248 100644
--- a/R/mkinmod.R
+++ b/R/mkinmod.R
@@ -70,7 +70,7 @@
#' @examples
#'
#' # Specify the SFO model (this is not needed any more, as we can now mkinfit("SFO", ...)
-#' SFO <- mkinmod(parent = list(type = "SFO"))
+#' SFO <- mkinmod(parent = mkinsub("SFO"))
#'
#' # One parent compound, one metabolite, both single first order
#' SFO_SFO <- mkinmod(
diff --git a/R/mkinpredict.R b/R/mkinpredict.R
index 8c76ca05..75582fac 100644
--- a/R/mkinpredict.R
+++ b/R/mkinpredict.R
@@ -92,10 +92,15 @@
#' c(parent = 100, m1 = 0), seq(0, 20, by = 0.1),
#' solution_type = "analytical", use_compiled = FALSE)[201,])
#' }
+#' analytical = mkinpredict(SFO_SFO,
+#' c(k_parent = 0.15, f_parent_to_m1 = 0.5, k_m1 = 0.01),
+#' c(parent = 100, m1 = 0), seq(0, 20, by = 0.1),
+#' solution_type = "analytical", use_compiled = FALSE)[201,]
#'
#' \dontrun{
#' # Predict from a fitted model
#' f <- mkinfit(SFO_SFO, FOCUS_2006_C, quiet = TRUE)
+#' f <- mkinfit(SFO_SFO, FOCUS_2006_C, quiet = TRUE, solution_type = "analytical")
#' head(mkinpredict(f))
#' }
#'
@@ -131,8 +136,16 @@ mkinpredict.mkinmod <- function(x,
}
if (solution_type == "analytical") {
- out <- x$deg_func(odeini = odeini,
- odeparms = odeparms, outtimes = outtimes)
+ # This is clumsy, as we wanted fast analytical predictions for mkinfit
+ out <- data.frame(time = outtimes)
+ obs_vars <- names(x$spec)
+ pseudo_observed <-
+ data.frame(name = rep(obs_vars, each = length(outtimes)),
+ time = rep(outtimes, length(obs_vars)))
+ pseudo_observed$predicted <- x$deg_func(pseudo_observed, odeini, odeparms)
+ for (obs_var in obs_vars) {
+ out[obs_var] <- pseudo_observed[pseudo_observed$name == obs_var, "predicted"]
+ }
}
if (solution_type == "eigen") {
diff --git a/R/plot.mkinfit.R b/R/plot.mkinfit.R
index 4b5bfa3a..b2cb4890 100644
--- a/R/plot.mkinfit.R
+++ b/R/plot.mkinfit.R
@@ -69,6 +69,7 @@ if(getRversion() >= '2.15.1') utils::globalVariables(c("type", "variable", "obse
#' \dontrun{
#' SFO_SFO <- mkinmod(parent = mkinsub("SFO", "m1", full = "Parent"),
#' m1 = mkinsub("SFO", full = "Metabolite M1" ))
+#' fit <- mkinfit(SFO_SFO, FOCUS_2006_D, quiet = TRUE)
#' fit <- mkinfit(SFO_SFO, FOCUS_2006_D, quiet = TRUE, error_model = "tc")
#' plot(fit)
#' plot_res(fit)

Contact - Imprint