From 636dade692b8eee012004a2740616385333efc48 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Fri, 8 May 2020 15:22:54 +0200 Subject: Avoid duplicate merge in tc error model fit This increases the performance in the complete test suite by about 20 secs from 120 to around 100 secs. I tried improving merge speed by using data.table on another branch, but this did not give a noticeable performance gain. --- R/mkinfit.R | 17 +++++++---------- test.log | 22 +++++++++++----------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/R/mkinfit.R b/R/mkinfit.R index f5e7e493..61593ce5 100644 --- a/R/mkinfit.R +++ b/R/mkinfit.R @@ -594,23 +594,20 @@ mkinfit <- function(mkinmod, observed, out_long <- mkin_wide_to_long(out, time = "time") + cost_data <- merge(observed[c("name", "time", "value")], out_long, + by = c("name", "time"), suffixes = c(".observed", ".predicted")) + if (err_mod == "const") { - observed$std <- if (OLS) NA else cost_errparms["sigma"] + cost_data$std <- if (OLS) NA else cost_errparms["sigma"] } if (err_mod == "obs") { - std_names <- paste0("sigma_", observed$name) - observed$std <- cost_errparms[std_names] + std_names <- paste0("sigma_", cost_data$name) + cost_data$std <- cost_errparms[std_names] } if (err_mod == "tc") { - tmp <- merge(observed, out_long, by = c("time", "name")) - tmp$name <- ordered(tmp$name, levels = obs_vars) - tmp <- tmp[order(tmp$name, tmp$time), ] - observed$std <- sqrt(cost_errparms["sigma_low"]^2 + tmp$value.y^2 * cost_errparms["rsd_high"]^2) + cost_data$std <- sqrt(cost_errparms["sigma_low"]^2 + cost_data$value.predicted^2 * cost_errparms["rsd_high"]^2) } - cost_data <- merge(observed[c("name", "time", "value", "std")], out_long, - by = c("name", "time"), suffixes = c(".observed", ".predicted")) - if (OLS) { # Cost is the sum of squared residuals cost <- with(cost_data, sum((value.observed - value.predicted)^2)) diff --git a/test.log b/test.log index db558407..4f8bb36b 100644 --- a/test.log +++ b/test.log @@ -2,32 +2,32 @@ Loading mkin Testing mkin ✔ | OK F W S | Context ✔ | 2 | Export dataset for reading into CAKE -✔ | 13 | Results for FOCUS D established in expertise for UBA (Ranke 2014) [3.2 s] +✔ | 13 | Results for FOCUS D established in expertise for UBA (Ranke 2014) [3.1 s] ✔ | 4 | Calculation of FOCUS chi2 error levels [1.9 s] -✔ | 7 | Fitting the SFORB model [9.9 s] +✔ | 7 | Fitting the SFORB model [9.8 s] ✔ | 5 | Calculation of Akaike weights -✔ | 10 | Confidence intervals and p-values [8.5 s] -✔ | 14 | Error model fitting [34.0 s] +✔ | 10 | Confidence intervals and p-values [8.4 s] +✔ | 14 | Error model fitting [22.0 s] ✔ | 6 | Test fitting the decline of metabolites from their maximum [0.7 s] ✔ | 1 | Fitting the logistic model [0.8 s] ✔ | 1 | Test dataset class mkinds used in gmkin ✔ | 12 | Special cases of mkinfit calls [2.1 s] ✔ | 8 | mkinmod model generation and printing [0.2 s] ✔ | 3 | Model predictions with mkinpredict [0.4 s] -✔ | 16 | Evaluations according to 2015 NAFTA guidance [3.9 s] -✔ | 9 | Nonlinear mixed-effects models [11.8 s] +✔ | 16 | Evaluations according to 2015 NAFTA guidance [3.8 s] +✔ | 9 | Nonlinear mixed-effects models [11.9 s] ✔ | 4 | Calculation of maximum time weighted average concentrations (TWAs) [2.4 s] ✔ | 3 | Summary -✔ | 14 | Plotting [4.5 s] +✔ | 14 | Plotting [4.1 s] ✔ | 4 | AIC calculation ✔ | 4 | Residuals extracted from mkinfit models -✔ | 2 | Complex test case from Schaefer et al. (2007) Piacenza paper [4.0 s] +✔ | 2 | Complex test case from Schaefer et al. (2007) Piacenza paper [3.9 s] ✔ | 1 | Summaries of old mkinfit objects -✔ | 4 | Results for synthetic data established in expertise for UBA (Ranke 2014) [6.0 s] -✔ | 9 | Hypothesis tests [29.9 s] +✔ | 4 | Results for synthetic data established in expertise for UBA (Ranke 2014) [6.1 s] +✔ | 9 | Hypothesis tests [21.6 s] ══ Results ═════════════════════════════════════════════════════════════════════ -Duration: 124.5 s +Duration: 103.4 s OK: 156 Failed: 0 -- cgit v1.2.1