Avoid duplicate merge in tc error model fit

This increases the performance in the complete test suite by about 20 secs from 120 to around 100 secs. I tried improving merge speed by using data.table on another branch, but this did not give a noticeable performance gain.
author: Johannes Ranke <jranke@uni-bremen.de> 2020-05-08 15:22:54 +0200
committer: Johannes Ranke <jranke@uni-bremen.de> 2020-05-08 15:22:54 +0200
commit: 636dade692b8eee012004a2740616385333efc48 (patch)
tree: 4b5d54da6f0ac77526b39ad092dfdaa7f64362bc
parent: 2c313bdb12c5fcae8272600021fcedbc99425130 (diff)
2 files changed, 18 insertions, 21 deletions
diff --git a/R/mkinfit.R b/R/mkinfit.R
index f5e7e493..61593ce5 100644
--- a/R/mkinfit.R
+++ b/R/mkinfit.R
@@ -594,23 +594,20 @@ mkinfit <- function(mkinmod, observed,
 
     out_long <- mkin_wide_to_long(out, time = "time")
 
+    cost_data <- merge(observed[c("name", "time", "value")], out_long,
+                         by = c("name", "time"), suffixes = c(".observed", ".predicted"))
+
     if (err_mod == "const") {
-      observed$std <- if (OLS) NA else cost_errparms["sigma"]
+      cost_data$std <- if (OLS) NA else cost_errparms["sigma"]
     }
     if (err_mod == "obs") {
-      std_names <- paste0("sigma_", observed$name)
-      observed$std <- cost_errparms[std_names]
+      std_names <- paste0("sigma_", cost_data$name)
+      cost_data$std <- cost_errparms[std_names]
     }
     if (err_mod == "tc") {
-      tmp <- merge(observed, out_long, by = c("time", "name"))
-      tmp$name <- ordered(tmp$name, levels = obs_vars)
-      tmp <- tmp[order(tmp$name, tmp$time), ]
-      observed$std <- sqrt(cost_errparms["sigma_low"]^2 + tmp$value.y^2 * cost_errparms["rsd_high"]^2)
+      cost_data$std <- sqrt(cost_errparms["sigma_low"]^2 + cost_data$value.predicted^2 * cost_errparms["rsd_high"]^2)
     }
 
-    cost_data <- merge(observed[c("name", "time", "value", "std")], out_long,
-                         by = c("name", "time"), suffixes = c(".observed", ".predicted"))
-
     if (OLS) {
       # Cost is the sum of squared residuals
       cost <- with(cost_data, sum((value.observed - value.predicted)^2))
diff --git a/test.log b/test.log
index db558407..4f8bb36b 100644
--- a/test.log
+++ b/test.log
@@ -2,32 +2,32 @@ Loading mkin
 Testing mkin
 ✔ |  OK F W S | Context
 ✔ |   2       | Export dataset for reading into CAKE
-✔ |  13       | Results for FOCUS D established in expertise for UBA (Ranke 2014) [3.2 s]
+✔ |  13       | Results for FOCUS D established in expertise for UBA (Ranke 2014) [3.1 s]
 ✔ |   4       | Calculation of FOCUS chi2 error levels [1.9 s]
-✔ |   7       | Fitting the SFORB model [9.9 s]
+✔ |   7       | Fitting the SFORB model [9.8 s]
 ✔ |   5       | Calculation of Akaike weights
-✔ |  10       | Confidence intervals and p-values [8.5 s]
-✔ |  14       | Error model fitting [34.0 s]
+✔ |  10       | Confidence intervals and p-values [8.4 s]
+✔ |  14       | Error model fitting [22.0 s]
 ✔ |   6       | Test fitting the decline of metabolites from their maximum [0.7 s]
 ✔ |   1       | Fitting the logistic model [0.8 s]
 ✔ |   1       | Test dataset class mkinds used in gmkin
 ✔ |  12       | Special cases of mkinfit calls [2.1 s]
 ✔ |   8       | mkinmod model generation and printing [0.2 s]
 ✔ |   3       | Model predictions with mkinpredict [0.4 s]
-✔ |  16       | Evaluations according to 2015 NAFTA guidance [3.9 s]
-✔ |   9       | Nonlinear mixed-effects models [11.8 s]
+✔ |  16       | Evaluations according to 2015 NAFTA guidance [3.8 s]
+✔ |   9       | Nonlinear mixed-effects models [11.9 s]
 ✔ |   4       | Calculation of maximum time weighted average concentrations (TWAs) [2.4 s]
 ✔ |   3       | Summary
-✔ |  14       | Plotting [4.5 s]
+✔ |  14       | Plotting [4.1 s]
 ✔ |   4       | AIC calculation
 ✔ |   4       | Residuals extracted from mkinfit models
-✔ |   2       | Complex test case from Schaefer et al. (2007) Piacenza paper [4.0 s]
+✔ |   2       | Complex test case from Schaefer et al. (2007) Piacenza paper [3.9 s]
 ✔ |   1       | Summaries of old mkinfit objects
-✔ |   4       | Results for synthetic data established in expertise for UBA (Ranke 2014) [6.0 s]
-✔ |   9       | Hypothesis tests [29.9 s]
+✔ |   4       | Results for synthetic data established in expertise for UBA (Ranke 2014) [6.1 s]
+✔ |   9       | Hypothesis tests [21.6 s]
 
 ══ Results ═════════════════════════════════════════════════════════════════════
-Duration: 124.5 s
+Duration: 103.4 s
 
 OK:       156
 Failed:   0
author	Johannes Ranke <jranke@uni-bremen.de>	2020-05-08 15:22:54 +0200
committer	Johannes Ranke <jranke@uni-bremen.de>	2020-05-08 15:22:54 +0200
commit	636dade692b8eee012004a2740616385333efc48 (patch)
tree	4b5d54da6f0ac77526b39ad092dfdaa7f64362bc
parent	2c313bdb12c5fcae8272600021fcedbc99425130 (diff)