From 636dade692b8eee012004a2740616385333efc48 Mon Sep 17 00:00:00 2001
From: Johannes Ranke <jranke@uni-bremen.de>
Date: Fri, 8 May 2020 15:22:54 +0200
Subject: Avoid duplicate merge in tc error model fit

This increases the performance in the complete test suite
by about 20 secs from 120 to around 100 secs.

I tried improving merge speed by using data.table on another
branch, but this did not give a noticeable performance gain.
---
 R/mkinfit.R | 17 +++++++----------
 test.log    | 22 +++++++++++-----------
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/R/mkinfit.R b/R/mkinfit.R
index f5e7e493..61593ce5 100644
--- a/R/mkinfit.R
+++ b/R/mkinfit.R
@@ -594,23 +594,20 @@ mkinfit <- function(mkinmod, observed,
 
     out_long <- mkin_wide_to_long(out, time = "time")
 
+    cost_data <- merge(observed[c("name", "time", "value")], out_long,
+                         by = c("name", "time"), suffixes = c(".observed", ".predicted"))
+
     if (err_mod == "const") {
-      observed$std <- if (OLS) NA else cost_errparms["sigma"]
+      cost_data$std <- if (OLS) NA else cost_errparms["sigma"]
     }
     if (err_mod == "obs") {
-      std_names <- paste0("sigma_", observed$name)
-      observed$std <- cost_errparms[std_names]
+      std_names <- paste0("sigma_", cost_data$name)
+      cost_data$std <- cost_errparms[std_names]
     }
     if (err_mod == "tc") {
-      tmp <- merge(observed, out_long, by = c("time", "name"))
-      tmp$name <- ordered(tmp$name, levels = obs_vars)
-      tmp <- tmp[order(tmp$name, tmp$time), ]
-      observed$std <- sqrt(cost_errparms["sigma_low"]^2 + tmp$value.y^2 * cost_errparms["rsd_high"]^2)
+      cost_data$std <- sqrt(cost_errparms["sigma_low"]^2 + cost_data$value.predicted^2 * cost_errparms["rsd_high"]^2)
     }
 
-    cost_data <- merge(observed[c("name", "time", "value", "std")], out_long,
-                         by = c("name", "time"), suffixes = c(".observed", ".predicted"))
-
     if (OLS) {
       # Cost is the sum of squared residuals
       cost <- with(cost_data, sum((value.observed - value.predicted)^2))
diff --git a/test.log b/test.log
index db558407..4f8bb36b 100644
--- a/test.log
+++ b/test.log
@@ -2,32 +2,32 @@ Loading mkin
 Testing mkin
 ✔ |  OK F W S | Context
 ✔ |   2       | Export dataset for reading into CAKE
-✔ |  13       | Results for FOCUS D established in expertise for UBA (Ranke 2014) [3.2 s]
+✔ |  13       | Results for FOCUS D established in expertise for UBA (Ranke 2014) [3.1 s]
 ✔ |   4       | Calculation of FOCUS chi2 error levels [1.9 s]
-✔ |   7       | Fitting the SFORB model [9.9 s]
+✔ |   7       | Fitting the SFORB model [9.8 s]
 ✔ |   5       | Calculation of Akaike weights
-✔ |  10       | Confidence intervals and p-values [8.5 s]
-✔ |  14       | Error model fitting [34.0 s]
+✔ |  10       | Confidence intervals and p-values [8.4 s]
+✔ |  14       | Error model fitting [22.0 s]
 ✔ |   6       | Test fitting the decline of metabolites from their maximum [0.7 s]
 ✔ |   1       | Fitting the logistic model [0.8 s]
 ✔ |   1       | Test dataset class mkinds used in gmkin
 ✔ |  12       | Special cases of mkinfit calls [2.1 s]
 ✔ |   8       | mkinmod model generation and printing [0.2 s]
 ✔ |   3       | Model predictions with mkinpredict [0.4 s]
-✔ |  16       | Evaluations according to 2015 NAFTA guidance [3.9 s]
-✔ |   9       | Nonlinear mixed-effects models [11.8 s]
+✔ |  16       | Evaluations according to 2015 NAFTA guidance [3.8 s]
+✔ |   9       | Nonlinear mixed-effects models [11.9 s]
 ✔ |   4       | Calculation of maximum time weighted average concentrations (TWAs) [2.4 s]
 ✔ |   3       | Summary
-✔ |  14       | Plotting [4.5 s]
+✔ |  14       | Plotting [4.1 s]
 ✔ |   4       | AIC calculation
 ✔ |   4       | Residuals extracted from mkinfit models
-✔ |   2       | Complex test case from Schaefer et al. (2007) Piacenza paper [4.0 s]
+✔ |   2       | Complex test case from Schaefer et al. (2007) Piacenza paper [3.9 s]
 ✔ |   1       | Summaries of old mkinfit objects
-✔ |   4       | Results for synthetic data established in expertise for UBA (Ranke 2014) [6.0 s]
-✔ |   9       | Hypothesis tests [29.9 s]
+✔ |   4       | Results for synthetic data established in expertise for UBA (Ranke 2014) [6.1 s]
+✔ |   9       | Hypothesis tests [21.6 s]
 
 ══ Results ═════════════════════════════════════════════════════════════════════
-Duration: 124.5 s
+Duration: 103.4 s
 
 OK:       156
 Failed:   0
-- 
cgit v1.2.3