From 7624a2b8398b4ad665a3b0b622488e1893a5ee7c Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Mon, 21 Oct 2019 12:11:34 +0200 Subject: Refactor mkinfit, infrastructure work mkinfit objects now include an ll() function to calculate the log-likelihood. Part of the code was refactored, hopefully making it easier to read and maintain. IRLS is currently the default algorithm for the error model "obs", for no particular reason. This may be subject to change when I get around to investigate. Slow tests are now in a separate subdirectory and will probably only be run by my own Makefile target. Formatting of test logs is improved. Roundtripping error model parameters works with a precision of 10% when we use lots of replicates in the synthetic data (see slow tests). This is not new in this commit, but as I think it is reasonable this closes #7. --- tests/testthat/DFOP_FOCUS_C_messages.txt | 2 +- tests/testthat/FOCUS_2006_D.csf | 2 +- tests/testthat/slow/test_parent_only.R | 218 +++++++++++++++++++++ .../slow/test_roundtrip_error_parameters.R | 141 +++++++++++++ tests/testthat/summary_DFOP_FOCUS_C.txt | 12 +- tests/testthat/test_confidence.R | 51 +++++ tests/testthat/test_error_models.R | 138 +------------ tests/testthat/test_parent_only.R | 218 --------------------- 8 files changed, 427 insertions(+), 355 deletions(-) create mode 100644 tests/testthat/slow/test_parent_only.R create mode 100644 tests/testthat/slow/test_roundtrip_error_parameters.R create mode 100644 tests/testthat/test_confidence.R delete mode 100644 tests/testthat/test_parent_only.R (limited to 'tests/testthat') diff --git a/tests/testthat/DFOP_FOCUS_C_messages.txt b/tests/testthat/DFOP_FOCUS_C_messages.txt index d3d7688b..78438d06 100644 --- a/tests/testthat/DFOP_FOCUS_C_messages.txt +++ b/tests/testthat/DFOP_FOCUS_C_messages.txt @@ -1,4 +1,4 @@ -parent_0 log_k1 log_k2 g_ilr sigma +parent_0 log_k1 log_k2 g_ilr 85.1 -2.302585 -4.60517 0 Sum of squared residuals at call 1: 7391.39 85.1 -2.302585 -4.60517 0 diff --git a/tests/testthat/FOCUS_2006_D.csf b/tests/testthat/FOCUS_2006_D.csf index f9233770..171abbb0 100644 --- a/tests/testthat/FOCUS_2006_D.csf +++ b/tests/testthat/FOCUS_2006_D.csf @@ -5,7 +5,7 @@ Description: MeasurementUnits: % AR TimeUnits: days Comments: Created using mkin::CAKE_export -Date: 2019-07-08 +Date: 2019-10-21 Optimiser: IRLS [Data] diff --git a/tests/testthat/slow/test_parent_only.R b/tests/testthat/slow/test_parent_only.R new file mode 100644 index 00000000..7521e145 --- /dev/null +++ b/tests/testthat/slow/test_parent_only.R @@ -0,0 +1,218 @@ +# Copyright (C) 2015,2018 Johannes Ranke +# Contact: jranke@uni-bremen.de + +# This file is part of the R package mkin + +# mkin is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. + +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. + +# You should have received a copy of the GNU General Public License along with +# this program. If not, see + +context("Fitting of parent only models") + +calc_dev.percent <- function(fitlist, reference, endpoints = TRUE, round_results = NULL) { + dev.percent <- list() + for (i in 1:length(fitlist)) { + fit <- fitlist[[i]] + if (endpoints) { + results <- c(fit$bparms.optim, + endpoints(fit)$distimes$DT50, + endpoints(fit)$distimes$DT90) + } else { + results <- fit$bparms.optim + } + if (!missing(round_results)) results <- round(results, round_results) + dev.percent[[i]] <- abs(100 * ((reference - results)/reference)) + } + return(dev.percent) +} + +SFO <- mkinmod(parent = list(type = "SFO")) +FOMC <- mkinmod(parent = list(type = "FOMC")) +DFOP <- mkinmod(parent = list(type = "DFOP")) +HS <- mkinmod(parent = list(type = "HS")) +SFORB <- mkinmod(parent = list(type = "SFORB")) + +test_that("Fits for FOCUS A deviate less than 0.1% from median of values from FOCUS report", { + fit.A.SFO <- list(mkinfit("SFO", FOCUS_2006_A, quiet = TRUE)) + + median.A.SFO <- as.numeric(lapply(subset(FOCUS_2006_SFO_ref_A_to_F, + dataset == "A", + c(M0, k, DT50, DT90)), "median")) + + dev.percent.A.SFO <- calc_dev.percent(fit.A.SFO, median.A.SFO) + expect_equivalent(dev.percent.A.SFO[[1]] < 0.1, rep(TRUE, 4)) + + # Fitting FOCUS A with FOMC is possible, but the correlation between + # alpha and beta, when obtained, is 1.0000, and the fit does not + # always converge using the Port algorithm (platform dependent), so + # we need to suppress a potential warning + suppressWarnings(fit.A.FOMC <- try(list(mkinfit("FOMC", FOCUS_2006_A, quiet = TRUE)))) + + if (!inherits(fit.A.FOMC, "try-error")) { + + median.A.FOMC <- as.numeric(lapply(subset(FOCUS_2006_FOMC_ref_A_to_F, + dataset == "A", + c(M0, alpha, beta, DT50, DT90)), "median")) + + dev.percent.A.FOMC <- calc_dev.percent(fit.A.FOMC, median.A.FOMC) + # alpha and are beta ill-determined, do not compare those + expect_equivalent(dev.percent.A.FOMC[[1]][c(1, 4, 5)] < 0.1, rep(TRUE, 3)) + } + + fit.A.DFOP <- list(mkinfit("DFOP", FOCUS_2006_A, quiet = TRUE)) + + median.A.DFOP <- as.numeric(lapply(subset(FOCUS_2006_DFOP_ref_A_to_B, + dataset == "A", + c(M0, k1, k2, f, DT50, DT90)), "median")) + + dev.percent.A.DFOP <- calc_dev.percent(fit.A.DFOP, median.A.DFOP) + #expect_equivalent(dev.percent.A.DFOP[[1]] < 0.1, rep(TRUE, 6)) # g/f is ill-determined + expect_equivalent(dev.percent.A.DFOP[[1]][c(1, 2, 3, 5, 6)] < 0.1, rep(TRUE, 5)) + + fit.A.HS <- list(mkinfit("HS", FOCUS_2006_A, quiet = TRUE)) + + median.A.HS <- as.numeric(lapply(subset(FOCUS_2006_HS_ref_A_to_F, + dataset == "A", + c(M0, k1, k2, tb, DT50, DT90)), "median")) + + dev.percent.A.HS <- calc_dev.percent(fit.A.HS, median.A.HS) + expect_equivalent(dev.percent.A.HS[[1]] < 0.1, rep(TRUE, 6)) +}) + +test_that("Fits for FOCUS B deviate less than 0.1% from median of values from FOCUS report", { + skip_on_cran() + fit.B.SFO <- list(mkinfit("SFO", FOCUS_2006_B, quiet = TRUE)) + + median.B.SFO <- as.numeric(lapply(subset(FOCUS_2006_SFO_ref_A_to_F, + dataset == "B", + c(M0, k, DT50, DT90)), "median")) + + dev.percent.B.SFO <- calc_dev.percent(fit.B.SFO, median.B.SFO) + expect_equivalent(dev.percent.B.SFO[[1]] < 0.1, rep(TRUE, 4)) + + fit.B.FOMC <- list(mkinfit("FOMC", FOCUS_2006_B, quiet = TRUE)) + + median.B.FOMC <- as.numeric(lapply(subset(FOCUS_2006_FOMC_ref_A_to_F, + dataset == "B", + c(M0, alpha, beta, DT50, DT90)), "median")) + + dev.percent.B.FOMC <- calc_dev.percent(fit.B.FOMC, median.B.FOMC) + expect_equivalent(dev.percent.B.FOMC[[1]] < 0.1, rep(TRUE, 5)) + + fit.B.DFOP <- list(mkinfit("DFOP", FOCUS_2006_B, quiet = TRUE)) + + median.B.DFOP <- as.numeric(lapply(subset(FOCUS_2006_DFOP_ref_A_to_B, + dataset == "B", + c(M0, k1, k2, f, DT50, DT90)), "median")) + + dev.percent.B.DFOP <- calc_dev.percent(fit.B.DFOP, median.B.DFOP) + #expect_equivalent(dev.percent.B.DFOP[[1]] < 0.1, rep(TRUE, 6)) # g/f is ill-determined + expect_equivalent(dev.percent.B.DFOP[[1]][c(1, 2, 3, 5, 6)] < 0.1, rep(TRUE, 5)) + + fit.B.HS <- list(mkinfit("HS", FOCUS_2006_B, quiet = TRUE)) + + median.B.HS <- as.numeric(lapply(subset(FOCUS_2006_HS_ref_A_to_F, + dataset == "B", + c(M0, k1, k2, tb, DT50, DT90)), + "median", na.rm = TRUE)) + + dev.percent.B.HS <- calc_dev.percent(fit.B.HS, median.B.HS) + expect_equivalent(dev.percent.B.HS[[1]] < 0.1, rep(TRUE, 6)) + + fit.B.SFORB <- list(mkinfit(SFORB, FOCUS_2006_B, quiet=TRUE)) + dev.percent.B.SFORB <- calc_dev.percent(fit.B.SFORB, median.B.DFOP) + expect_equivalent(dev.percent.B.SFORB[[1]][c(1, 5, 6)] < 0.1, rep(TRUE, 3)) +}) + +test_that("Fits for FOCUS C deviate less than 0.1% from median of values from FOCUS report", { + fit.C.SFO <- list(mkinfit("SFO", FOCUS_2006_C, quiet = TRUE)) + + median.C.SFO <- as.numeric(lapply(subset(FOCUS_2006_SFO_ref_A_to_F, + dataset == "C", + c(M0, k, DT50, DT90)), "median")) + + dev.percent.C.SFO <- calc_dev.percent(fit.C.SFO, median.C.SFO) + expect_equivalent(dev.percent.C.SFO[[1]] < 0.1, rep(TRUE, 4)) + + fit.C.FOMC <- list(mkinfit("FOMC", FOCUS_2006_C, quiet = TRUE)) + + median.C.FOMC <- as.numeric(lapply(subset(FOCUS_2006_FOMC_ref_A_to_F, + dataset == "C", + c(M0, alpha, beta, DT50, DT90)), "median")) + + dev.percent.C.FOMC <- calc_dev.percent(fit.C.FOMC, median.C.FOMC, + round_results = 2) # Not enough precision in FOCUS results + expect_equivalent(dev.percent.C.FOMC[[1]] < 0.1, rep(TRUE, 5)) + + fit.C.HS <- list(mkinfit("HS", FOCUS_2006_C, quiet = TRUE)) + + median.C.HS <- as.numeric(lapply(subset(FOCUS_2006_HS_ref_A_to_F, + dataset == "C", + c(M0, k1, k2, tb, DT50, DT90)), "median")) + + dev.percent.C.HS <- calc_dev.percent(fit.C.HS, median.C.HS, round_results = c(2, 4, 6, 2, 2)) + # Not enouth precision in k2 available + expect_equivalent(dev.percent.C.HS[[1]] < c(0.1, 0.1, 0.3, 0.1, 0.1, 0.1), rep(TRUE, 6)) +}) + +test_that("SFO fits give approximately (0.001%) equal results with different solution methods", { + skip_on_cran() + fit.A.SFO.default <- mkinfit("SFO", FOCUS_2006_A, quiet = TRUE)$bparms.optim + + fits.A.SFO <- list() + fits.A.SFO[[1]] <- mkinfit(SFO, FOCUS_2006_A, quiet = TRUE) + fits.A.SFO[[2]] <- mkinfit(SFO, FOCUS_2006_A, quiet = TRUE, solution_type = "eigen") + fits.A.SFO[[3]] <- mkinfit(SFO, FOCUS_2006_A, quiet = TRUE, solution_type = "deSolve") + + dev.percent <- calc_dev.percent(fits.A.SFO, fit.A.SFO.default, endpoints = FALSE) + expect_equivalent(dev.percent[[1]] < 0.001, rep(TRUE, 2)) + expect_equivalent(dev.percent[[2]] < 0.001, rep(TRUE, 2)) + expect_equivalent(dev.percent[[3]] < 0.001, rep(TRUE, 2)) +}) + +test_that("FOMC fits give approximately (0.001%) equal results with different solution methods", { + skip_on_cran() + fit.C.FOMC.default <- mkinfit("FOMC", FOCUS_2006_C, quiet = TRUE)$bparms.optim + + fits.C.FOMC <- list() + fits.C.FOMC[[1]] <- mkinfit(FOMC, FOCUS_2006_C, quiet = TRUE) + fits.C.FOMC[[2]] <- mkinfit(FOMC, FOCUS_2006_C, quiet = TRUE, solution_type = "deSolve") + + dev.percent <- calc_dev.percent(fits.C.FOMC, fit.C.FOMC.default, endpoints = FALSE) + expect_equivalent(dev.percent[[1]] < 0.001, rep(TRUE, 3)) + expect_equivalent(dev.percent[[2]] < 0.001, rep(TRUE, 3)) +}) + +test_that("DFOP fits give approximately (0.001%) equal results with different solution methods", { + skip_on_cran() + fit.C.DFOP.default <- mkinfit("DFOP", FOCUS_2006_C, quiet = TRUE)$bparms.optim + + fits.C.DFOP <- list() + fits.C.DFOP[[1]] <- mkinfit(DFOP, FOCUS_2006_C, quiet = TRUE) + fits.C.DFOP[[2]] <- mkinfit(DFOP, FOCUS_2006_C, quiet = TRUE, solution_type = "deSolve") + + dev.percent <- calc_dev.percent(fits.C.DFOP, fit.C.DFOP.default, endpoints = FALSE) + expect_equivalent(dev.percent[[1]] < 0.001, rep(TRUE, 4)) + expect_equivalent(dev.percent[[2]] < 0.001, rep(TRUE, 4)) +}) + +test_that("SFORB fits give approximately (0.002%) equal results with different solution methods", { + skip_on_cran() + fit.B.SFORB.default <- mkinfit(SFORB, FOCUS_2006_B, quiet=TRUE)$bparms.optim + + fits.B.SFORB <- list() + fits.B.SFORB[[1]] <- mkinfit(SFORB, FOCUS_2006_B, quiet=TRUE, solution_type = "eigen") + fits.B.SFORB[[2]] <- mkinfit(SFORB, FOCUS_2006_B, quiet=TRUE, solution_type = "deSolve") + dev.percent <- calc_dev.percent(fits.B.SFORB, fit.B.SFORB.default, endpoints = FALSE) + expect_equivalent(dev.percent[[1]] < 0.001, rep(TRUE, 4)) + expect_equivalent(dev.percent[[2]] < 0.002, rep(TRUE, 4)) +}) diff --git a/tests/testthat/slow/test_roundtrip_error_parameters.R b/tests/testthat/slow/test_roundtrip_error_parameters.R new file mode 100644 index 00000000..97510563 --- /dev/null +++ b/tests/testthat/slow/test_roundtrip_error_parameters.R @@ -0,0 +1,141 @@ +test_that("Reweighting method 'tc' produces reasonable variance estimates", { + + # Check if we can approximately obtain the parameters and the error model + # components that were used in the data generation + + # Parent only + DFOP <- mkinmod(parent = mkinsub("DFOP")) + sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) + parms_DFOP <- c(k1 = 0.2, k2 = 0.02, g = 0.5) + parms_DFOP_optim <- c(parent_0 = 100, parms_DFOP) + + d_DFOP <- mkinpredict(DFOP, + parms_DFOP, c(parent = 100), + sampling_times) + d_2_10 <- add_err(d_DFOP, + sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), + n = 10, reps = 2, digits = 5, LOD = -Inf, seed = 123456) + d_100_1 <- add_err(d_DFOP, + sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), + n = 1, reps = 100, digits = 5, LOD = -Inf, seed = 123456) + + # Per default (on my box where I set NOT_CRAN) use all cores minus one + if (identical(Sys.getenv("NOT_CRAN"), "true")) { + n_cores <- parallel::detectCores() - 1 + } else { + n_cores <- 1 + } + + # We are only allowed one core on travis, but they also set NOT_CRAN=true + if (Sys.getenv("TRAVIS") != "") n_cores = 1 + + # On Windows we would need to make a cluster first + if (Sys.info()["sysname"] == "Windows") n_cores = 1 + + # Unweighted fits + f_2_10 <- mmkin("DFOP", d_2_10, error_model = "const", quiet = TRUE, + cores = n_cores) + parms_2_10 <- apply(sapply(f_2_10, function(x) x$bparms.optim), 1, mean) + parm_errors_2_10 <- (parms_2_10 - parms_DFOP_optim) / parms_DFOP_optim + expect_true(all(abs(parm_errors_2_10) < 0.12)) + + f_2_10_tc <- mmkin("DFOP", d_2_10, error_model = "tc", quiet = TRUE, + cores = n_cores) + parms_2_10_tc <- apply(sapply(f_2_10_tc, function(x) x$bparms.optim), 1, mean) + parm_errors_2_10_tc <- (parms_2_10_tc - parms_DFOP_optim) / parms_DFOP_optim + expect_true(all(abs(parm_errors_2_10_tc) < 0.05)) + + tcf_2_10_tc <- apply(sapply(f_2_10_tc, function(x) x$errparms), 1, mean, na.rm = TRUE) + + tcf_2_10_error_model_errors <- (tcf_2_10_tc - c(0.5, 0.07)) / c(0.5, 0.07) + expect_true(all(abs(tcf_2_10_error_model_errors) < 0.2)) + + # When we have 100 replicates in the synthetic data, we can roundtrip + # the parameters with < 2% precision + f_tc_100_1 <- mkinfit(DFOP, d_100_1[[1]], error_model = "tc", quiet = TRUE) + parm_errors_100_1 <- (f_tc_100_1$bparms.optim - parms_DFOP_optim) / parms_DFOP_optim + expect_true(all(abs(parm_errors_100_1) < 0.02)) + + tcf_100_1_error_model_errors <- (f_tc_100_1$errparms - c(0.5, 0.07)) / + c(0.5, 0.07) + # We also get a precision of < 2% for the error model components + expect_true(all(abs(tcf_100_1_error_model_errors) < 0.02)) + + # Parent and two metabolites + m_synth_DFOP_lin <- mkinmod(parent = list(type = "DFOP", to = "M1"), + M1 = list(type = "SFO", to = "M2"), + M2 = list(type = "SFO"), use_of_ff = "max", + quiet = TRUE) + sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) + parms_DFOP_lin <- c(k1 = 0.2, k2 = 0.02, g = 0.5, + f_parent_to_M1 = 0.5, k_M1 = 0.3, + f_M1_to_M2 = 0.7, k_M2 = 0.02) + d_synth_DFOP_lin <- mkinpredict(m_synth_DFOP_lin, + parms_DFOP_lin, + c(parent = 100, M1 = 0, M2 = 0), + sampling_times) + parms_DFOP_lin_optim = c(parent_0 = 100, parms_DFOP_lin) + + d_met_2_15 <- add_err(d_synth_DFOP_lin, + sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), + n = 15, reps = 100, digits = 5, LOD = 0.01, seed = 123456) + + # For a single fit, we get a relative error of less than 5% in the error + # model components + f_met_2_tc_e4 <- mkinfit(m_synth_DFOP_lin, d_met_2_15[[1]], quiet = TRUE, + error_model = "tc", error_model_algorithm = "direct") + parm_errors_met_2_tc_e4 <- (f_met_2_tc_e4$errparms - c(0.5, 0.07)) / c(0.5, 0.07) + expect_true(all(abs(parm_errors_met_2_tc_e4) < 0.05)) + + # Doing more takes a lot of computing power + skip_on_travis() + skip_on_cran() + f_met_2_15_tc_e4 <- mmkin(list(m_synth_DFOP_lin), d_met_2_15, quiet = TRUE, + error_model = "tc", cores = n_cores) + + parms_met_2_15_tc_e4 <- apply(sapply(f_met_2_15_tc_e4, function(x) x$bparms.optim), 1, mean) + parm_errors_met_2_15_tc_e4 <- (parms_met_2_15_tc_e4[names(parms_DFOP_lin_optim)] - + parms_DFOP_lin_optim) / parms_DFOP_lin_optim + expect_true(all(abs(parm_errors_met_2_15_tc_e4) < 0.015)) + + tcf_met_2_15_tc <- apply(sapply(f_met_2_15_tc_e4, function(x) x$errparms), 1, mean, na.rm = TRUE) + + tcf_met_2_15_tc_error_model_errors <- (tcf_met_2_15_tc - c(0.5, 0.07)) / + c(0.5, 0.07) + + # Here we get a precision < 10% for retrieving the original error model components + # from 15 datasets + expect_true(all(abs(tcf_met_2_15_tc_error_model_errors) < 0.10)) +}) + +test_that("The different error model fitting methods work for parent fits", { + skip_on_cran() + + f_9_OLS <- mkinfit("SFO", experimental_data_for_UBA_2019[[9]]$data, + quiet = TRUE) + expect_equivalent(round(AIC(f_9_OLS), 2), 137.43) + + f_9_direct <- mkinfit("SFO", experimental_data_for_UBA_2019[[9]]$data, + error_model = "tc", error_model_algorithm = "direct", quiet = TRUE) + expect_equivalent(round(AIC(f_9_direct), 2), 134.94) + + f_9_twostep <- mkinfit("SFO", experimental_data_for_UBA_2019[[9]]$data, + error_model = "tc", error_model_algorithm = "twostep", quiet = TRUE) + expect_equivalent(round(AIC(f_9_twostep), 2), 134.94) + + f_9_threestep <- mkinfit("SFO", experimental_data_for_UBA_2019[[9]]$data, + error_model = "tc", error_model_algorithm = "threestep", quiet = TRUE) + expect_equivalent(round(AIC(f_9_threestep), 2), 139.43) + + f_9_fourstep <- mkinfit("SFO", experimental_data_for_UBA_2019[[9]]$data, + error_model = "tc", error_model_algorithm = "fourstep", quiet = TRUE) + expect_equivalent(round(AIC(f_9_fourstep), 2), 139.43) + + f_9_IRLS <- mkinfit("SFO", experimental_data_for_UBA_2019[[9]]$data, + error_model = "tc", error_model_algorithm = "IRLS", quiet = TRUE) + expect_equivalent(round(AIC(f_9_IRLS), 2), 139.43) + + f_9_d_3 <- mkinfit("SFO", experimental_data_for_UBA_2019[[9]]$data, + error_model = "tc", error_model_algorithm = "d_3", quiet = TRUE) + expect_equivalent(round(AIC(f_9_d_3), 2), 134.94) +}) diff --git a/tests/testthat/summary_DFOP_FOCUS_C.txt b/tests/testthat/summary_DFOP_FOCUS_C.txt index b1afeff6..90ce82e2 100644 --- a/tests/testthat/summary_DFOP_FOCUS_C.txt +++ b/tests/testthat/summary_DFOP_FOCUS_C.txt @@ -17,12 +17,11 @@ Error model: Constant variance Error model algorithm: OLS Starting values for parameters to be optimised: - value type -parent_0 85.100000 state -k1 0.100000 deparm -k2 0.010000 deparm -g 0.500000 deparm -sigma 0.696237 error + value type +parent_0 85.10 state +k1 0.10 deparm +k2 0.01 deparm +g 0.50 deparm Starting values for the transformed parameters actually optimised: value lower upper @@ -30,7 +29,6 @@ parent_0 85.100000 -Inf Inf log_k1 -2.302585 -Inf Inf log_k2 -4.605170 -Inf Inf g_ilr 0.000000 -Inf Inf -sigma 0.696237 0 Inf Fixed parameter values: None diff --git a/tests/testthat/test_confidence.R b/tests/testthat/test_confidence.R new file mode 100644 index 00000000..e5cc1954 --- /dev/null +++ b/tests/testthat/test_confidence.R @@ -0,0 +1,51 @@ +# Copyright (C) 2019 Johannes Ranke +# Contact: jranke@uni-bremen.de + +# This file is part of the R package mkin + +# mkin is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. + +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. + +# You should have received a copy of the GNU General Public License along with +# this program. If not, see + +context("Confidence intervals and p-values") + +m_synth_SFO_lin <- mkinmod( + parent = mkinsub("SFO", "M1"), + M1 = mkinsub("SFO", "M2"), + M2 = mkinsub("SFO"), + use_of_ff = "max", quiet = TRUE) + +SFO_lin_a <- synthetic_data_for_UBA_2014[[1]]$data + +test_that("Confidence intervals are stable", { + f_1_mkin_OLS <- mkinfit(m_synth_SFO_lin, SFO_lin_a, quiet = TRUE) + f_1_mkin_ML <- mkinfit(m_synth_SFO_lin, SFO_lin_a, quiet = TRUE, + error_model = "const", error_model_algorithm = "direct") + + bpar_1 <- summary(f_1_mkin_ML)$bpar[, c("Estimate", "Lower", "Upper")] + # The reference used here is mkin 0.9.48.1 + bpar_1_mkin_0.9 <- read.table(text = +"parent_0 102.0000 98.6000 106.0000 +k_parent 0.7390 0.6770 0.8070 +k_M1 0.2990 0.2560 0.3490 +k_M2 0.0202 0.0176 0.0233 +f_parent_to_M1 0.7690 0.6640 0.8480 +f_M1_to_M2 0.7230 0.6030 0.8180", +col.names = c("parameter", "estimate", "lower", "upper")) + + expect_equivalent(signif(bpar_1[1:6, "Estimate"], 3), bpar_1_mkin_0.9$estimate) + + # Relative difference of lower bound of the confidence interval is < 0.02 + expect_equivalent(bpar_1[1:6, "Lower"], bpar_1_mkin_0.9$lower, + scale = bpar_1_mkin_0.9$lower, tolerance = 0.02) + }) + diff --git a/tests/testthat/test_error_models.R b/tests/testthat/test_error_models.R index fbae6286..f4015e00 100644 --- a/tests/testthat/test_error_models.R +++ b/tests/testthat/test_error_models.R @@ -35,25 +35,18 @@ DFOP_par_c <- synthetic_data_for_UBA_2014[[12]]$data test_that("Error model 'const' works", { skip_on_cran() fit_const_1 <- mkinfit(m_synth_SFO_lin, SFO_lin_a, error_model = "const", quiet = TRUE) - bpar_1 <- summary(fit_const_1)$bpar[, c("Estimate", "Lower", "Upper")] + bpar_1 <- fit_const_1$bparms.optim # The reference used here is mkin 0.9.48.1 bpar_1_mkin_0.9 <- read.table(text = -"parent_0 102.0000 98.6000 106.0000 -k_parent 0.7390 0.6770 0.8070 -k_M1 0.2990 0.2560 0.3490 -k_M2 0.0202 0.0176 0.0233 -f_parent_to_M1 0.7690 0.6640 0.8480 -f_M1_to_M2 0.7230 0.6030 0.8180", -col.names = c("parameter", "estimate", "lower", "upper")) - - expect_equivalent(signif(bpar_1[1:6, "Estimate"], 3), bpar_1_mkin_0.9$estimate) - # Relative difference of lower bound of confidence is < 0.02 - rel_diff <- function(v1, v2) { - (v1 - v2)/v2 - } - expect_equivalent(rel_diff(bpar_1[1:6, "Lower"], - bpar_1_mkin_0.9$lower), - rep(0, 6), tolerance = 0.02) +"parent_0 102.0000 +k_parent 0.7390 +k_M1 0.2990 +k_M2 0.0202 +f_parent_to_M1 0.7690 +f_M1_to_M2 0.7230", +col.names = c("parameter", "estimate")) + + expect_equivalent(signif(bpar_1, 3), bpar_1_mkin_0.9$estimate) }) test_that("Error model 'obs' works", { @@ -70,117 +63,6 @@ test_that("Error model 'tc' works", { expect_equivalent(parms_3, c(102.1, 0.7393, 0.2992, 0.0202, 0.7687, 0.7229)) }) -test_that("Reweighting method 'tc' produces reasonable variance estimates", { - - # Check if we can approximately obtain the parameters and the error model - # components that were used in the data generation - - # Parent only - DFOP <- mkinmod(parent = mkinsub("DFOP")) - sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) - parms_DFOP <- c(k1 = 0.2, k2 = 0.02, g = 0.5) - parms_DFOP_optim <- c(parent_0 = 100, parms_DFOP) - - d_DFOP <- mkinpredict(DFOP, - parms_DFOP, c(parent = 100), - sampling_times) - d_2_10 <- add_err(d_DFOP, - sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), - n = 10, reps = 2, digits = 5, LOD = -Inf, seed = 123456) - d_100_1 <- add_err(d_DFOP, - sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), - n = 1, reps = 100, digits = 5, LOD = -Inf, seed = 123456) - - # Per default (on my box where I set NOT_CRAN) use all cores minus one - if (identical(Sys.getenv("NOT_CRAN"), "true")) { - n_cores <- parallel::detectCores() - 1 - } else { - n_cores <- 1 - } - - # We are only allowed one core on travis, but they also set NOT_CRAN=true - if (Sys.getenv("TRAVIS") != "") n_cores = 1 - - # On Windows we would need to make a cluster first - if (Sys.info()["sysname"] == "Windows") n_cores = 1 - - # Unweighted fits - f_2_10 <- mmkin("DFOP", d_2_10, error_model = "const", quiet = TRUE, - cores = n_cores) - parms_2_10 <- apply(sapply(f_2_10, function(x) x$bparms.optim), 1, mean) - parm_errors_2_10 <- (parms_2_10 - parms_DFOP_optim) / parms_DFOP_optim - expect_true(all(abs(parm_errors_2_10) < 0.12)) - - f_2_10_tc <- mmkin("DFOP", d_2_10, error_model = "tc", quiet = TRUE, - cores = n_cores) - parms_2_10_tc <- apply(sapply(f_2_10_tc, function(x) x$bparms.optim), 1, mean) - parm_errors_2_10_tc <- (parms_2_10_tc - parms_DFOP_optim) / parms_DFOP_optim - expect_true(all(abs(parm_errors_2_10_tc) < 0.05)) - - tcf_2_10_tc <- apply(sapply(f_2_10_tc, function(x) x$errparms), 1, mean, na.rm = TRUE) - - tcf_2_10_error_model_errors <- (tcf_2_10_tc - c(0.5, 0.07)) / c(0.5, 0.07) - expect_true(all(abs(tcf_2_10_error_model_errors) < 0.2)) - - # When we have 100 replicates in the synthetic data, we can roundtrip - # the parameters with < 2% precision - f_tc_100_1 <- mkinfit(DFOP, d_100_1[[1]], error_model = "tc", quiet = TRUE) - parm_errors_100_1 <- (f_tc_100_1$bparms.optim - parms_DFOP_optim) / parms_DFOP_optim - expect_true(all(abs(parm_errors_100_1) < 0.02)) - - tcf_100_1_error_model_errors <- (f_tc_100_1$errparms - c(0.5, 0.07)) / - c(0.5, 0.07) - # When maximising the likelihood directly (not using IRLS), we get - # a precision of < 2% for the error model componentes as well - expect_true(all(abs(tcf_100_1_error_model_errors) < 0.02)) - - # Parent and two metabolites - m_synth_DFOP_lin <- mkinmod(parent = list(type = "DFOP", to = "M1"), - M1 = list(type = "SFO", to = "M2"), - M2 = list(type = "SFO"), use_of_ff = "max", - quiet = TRUE) - sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120) - parms_DFOP_lin <- c(k1 = 0.2, k2 = 0.02, g = 0.5, - f_parent_to_M1 = 0.5, k_M1 = 0.3, - f_M1_to_M2 = 0.7, k_M2 = 0.02) - d_synth_DFOP_lin <- mkinpredict(m_synth_DFOP_lin, - parms_DFOP_lin, - c(parent = 100, M1 = 0, M2 = 0), - sampling_times) - parms_DFOP_lin_optim = c(parent_0 = 100, parms_DFOP_lin) - - d_met_2_15 <- add_err(d_synth_DFOP_lin, - sdfunc = function(x) sigma_twocomp(x, 0.5, 0.07), - n = 15, reps = 100, digits = 5, LOD = 0.01, seed = 123456) - - # For a single fit, we get a relative error of less than 10% in the error - # model components - f_met_2_tc_e4 <- mkinfit(m_synth_DFOP_lin, d_met_2_15[[1]], quiet = TRUE, - error_model = "tc", error_model_algorithm = "direct") - parm_errors_met_2_tc_e4 <- (f_met_2_tc_e4$errparms - c(0.5, 0.07)) / c(0.5, 0.07) - expect_true(all(abs(parm_errors_met_2_tc_e4) < 0.1)) - - # Doing more takes a lot of computing power - skip_on_travis() - skip_on_cran() - f_met_2_15_tc_e4 <- mmkin(list(m_synth_DFOP_lin), d_met_2_15, quiet = TRUE, - error_model = "tc", cores = n_cores) - - parms_met_2_15_tc_e4 <- apply(sapply(f_met_2_15_tc_e4, function(x) x$bparms.optim), 1, mean) - parm_errors_met_2_15_tc_e4 <- (parms_met_2_15_tc_e4[names(parms_DFOP_lin_optim)] - - parms_DFOP_lin_optim) / parms_DFOP_lin_optim - expect_true(all(abs(parm_errors_met_2_15_tc_e4) < 0.015)) - - tcf_met_2_15_tc <- apply(sapply(f_met_2_15_tc_e4, function(x) x$errparms), 1, mean, na.rm = TRUE) - - tcf_met_2_15_tc_error_model_errors <- (tcf_met_2_15_tc - c(0.5, 0.07)) / - c(0.5, 0.07) - - # Here we get a precision < 10% for retrieving the original error model components - # from 15 datasets - expect_true(all(abs(tcf_met_2_15_tc_error_model_errors) < 0.10)) -}) - test_that("The different error model fitting methods work for parent fits", { skip_on_cran() diff --git a/tests/testthat/test_parent_only.R b/tests/testthat/test_parent_only.R deleted file mode 100644 index 7521e145..00000000 --- a/tests/testthat/test_parent_only.R +++ /dev/null @@ -1,218 +0,0 @@ -# Copyright (C) 2015,2018 Johannes Ranke -# Contact: jranke@uni-bremen.de - -# This file is part of the R package mkin - -# mkin is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see - -context("Fitting of parent only models") - -calc_dev.percent <- function(fitlist, reference, endpoints = TRUE, round_results = NULL) { - dev.percent <- list() - for (i in 1:length(fitlist)) { - fit <- fitlist[[i]] - if (endpoints) { - results <- c(fit$bparms.optim, - endpoints(fit)$distimes$DT50, - endpoints(fit)$distimes$DT90) - } else { - results <- fit$bparms.optim - } - if (!missing(round_results)) results <- round(results, round_results) - dev.percent[[i]] <- abs(100 * ((reference - results)/reference)) - } - return(dev.percent) -} - -SFO <- mkinmod(parent = list(type = "SFO")) -FOMC <- mkinmod(parent = list(type = "FOMC")) -DFOP <- mkinmod(parent = list(type = "DFOP")) -HS <- mkinmod(parent = list(type = "HS")) -SFORB <- mkinmod(parent = list(type = "SFORB")) - -test_that("Fits for FOCUS A deviate less than 0.1% from median of values from FOCUS report", { - fit.A.SFO <- list(mkinfit("SFO", FOCUS_2006_A, quiet = TRUE)) - - median.A.SFO <- as.numeric(lapply(subset(FOCUS_2006_SFO_ref_A_to_F, - dataset == "A", - c(M0, k, DT50, DT90)), "median")) - - dev.percent.A.SFO <- calc_dev.percent(fit.A.SFO, median.A.SFO) - expect_equivalent(dev.percent.A.SFO[[1]] < 0.1, rep(TRUE, 4)) - - # Fitting FOCUS A with FOMC is possible, but the correlation between - # alpha and beta, when obtained, is 1.0000, and the fit does not - # always converge using the Port algorithm (platform dependent), so - # we need to suppress a potential warning - suppressWarnings(fit.A.FOMC <- try(list(mkinfit("FOMC", FOCUS_2006_A, quiet = TRUE)))) - - if (!inherits(fit.A.FOMC, "try-error")) { - - median.A.FOMC <- as.numeric(lapply(subset(FOCUS_2006_FOMC_ref_A_to_F, - dataset == "A", - c(M0, alpha, beta, DT50, DT90)), "median")) - - dev.percent.A.FOMC <- calc_dev.percent(fit.A.FOMC, median.A.FOMC) - # alpha and are beta ill-determined, do not compare those - expect_equivalent(dev.percent.A.FOMC[[1]][c(1, 4, 5)] < 0.1, rep(TRUE, 3)) - } - - fit.A.DFOP <- list(mkinfit("DFOP", FOCUS_2006_A, quiet = TRUE)) - - median.A.DFOP <- as.numeric(lapply(subset(FOCUS_2006_DFOP_ref_A_to_B, - dataset == "A", - c(M0, k1, k2, f, DT50, DT90)), "median")) - - dev.percent.A.DFOP <- calc_dev.percent(fit.A.DFOP, median.A.DFOP) - #expect_equivalent(dev.percent.A.DFOP[[1]] < 0.1, rep(TRUE, 6)) # g/f is ill-determined - expect_equivalent(dev.percent.A.DFOP[[1]][c(1, 2, 3, 5, 6)] < 0.1, rep(TRUE, 5)) - - fit.A.HS <- list(mkinfit("HS", FOCUS_2006_A, quiet = TRUE)) - - median.A.HS <- as.numeric(lapply(subset(FOCUS_2006_HS_ref_A_to_F, - dataset == "A", - c(M0, k1, k2, tb, DT50, DT90)), "median")) - - dev.percent.A.HS <- calc_dev.percent(fit.A.HS, median.A.HS) - expect_equivalent(dev.percent.A.HS[[1]] < 0.1, rep(TRUE, 6)) -}) - -test_that("Fits for FOCUS B deviate less than 0.1% from median of values from FOCUS report", { - skip_on_cran() - fit.B.SFO <- list(mkinfit("SFO", FOCUS_2006_B, quiet = TRUE)) - - median.B.SFO <- as.numeric(lapply(subset(FOCUS_2006_SFO_ref_A_to_F, - dataset == "B", - c(M0, k, DT50, DT90)), "median")) - - dev.percent.B.SFO <- calc_dev.percent(fit.B.SFO, median.B.SFO) - expect_equivalent(dev.percent.B.SFO[[1]] < 0.1, rep(TRUE, 4)) - - fit.B.FOMC <- list(mkinfit("FOMC", FOCUS_2006_B, quiet = TRUE)) - - median.B.FOMC <- as.numeric(lapply(subset(FOCUS_2006_FOMC_ref_A_to_F, - dataset == "B", - c(M0, alpha, beta, DT50, DT90)), "median")) - - dev.percent.B.FOMC <- calc_dev.percent(fit.B.FOMC, median.B.FOMC) - expect_equivalent(dev.percent.B.FOMC[[1]] < 0.1, rep(TRUE, 5)) - - fit.B.DFOP <- list(mkinfit("DFOP", FOCUS_2006_B, quiet = TRUE)) - - median.B.DFOP <- as.numeric(lapply(subset(FOCUS_2006_DFOP_ref_A_to_B, - dataset == "B", - c(M0, k1, k2, f, DT50, DT90)), "median")) - - dev.percent.B.DFOP <- calc_dev.percent(fit.B.DFOP, median.B.DFOP) - #expect_equivalent(dev.percent.B.DFOP[[1]] < 0.1, rep(TRUE, 6)) # g/f is ill-determined - expect_equivalent(dev.percent.B.DFOP[[1]][c(1, 2, 3, 5, 6)] < 0.1, rep(TRUE, 5)) - - fit.B.HS <- list(mkinfit("HS", FOCUS_2006_B, quiet = TRUE)) - - median.B.HS <- as.numeric(lapply(subset(FOCUS_2006_HS_ref_A_to_F, - dataset == "B", - c(M0, k1, k2, tb, DT50, DT90)), - "median", na.rm = TRUE)) - - dev.percent.B.HS <- calc_dev.percent(fit.B.HS, median.B.HS) - expect_equivalent(dev.percent.B.HS[[1]] < 0.1, rep(TRUE, 6)) - - fit.B.SFORB <- list(mkinfit(SFORB, FOCUS_2006_B, quiet=TRUE)) - dev.percent.B.SFORB <- calc_dev.percent(fit.B.SFORB, median.B.DFOP) - expect_equivalent(dev.percent.B.SFORB[[1]][c(1, 5, 6)] < 0.1, rep(TRUE, 3)) -}) - -test_that("Fits for FOCUS C deviate less than 0.1% from median of values from FOCUS report", { - fit.C.SFO <- list(mkinfit("SFO", FOCUS_2006_C, quiet = TRUE)) - - median.C.SFO <- as.numeric(lapply(subset(FOCUS_2006_SFO_ref_A_to_F, - dataset == "C", - c(M0, k, DT50, DT90)), "median")) - - dev.percent.C.SFO <- calc_dev.percent(fit.C.SFO, median.C.SFO) - expect_equivalent(dev.percent.C.SFO[[1]] < 0.1, rep(TRUE, 4)) - - fit.C.FOMC <- list(mkinfit("FOMC", FOCUS_2006_C, quiet = TRUE)) - - median.C.FOMC <- as.numeric(lapply(subset(FOCUS_2006_FOMC_ref_A_to_F, - dataset == "C", - c(M0, alpha, beta, DT50, DT90)), "median")) - - dev.percent.C.FOMC <- calc_dev.percent(fit.C.FOMC, median.C.FOMC, - round_results = 2) # Not enough precision in FOCUS results - expect_equivalent(dev.percent.C.FOMC[[1]] < 0.1, rep(TRUE, 5)) - - fit.C.HS <- list(mkinfit("HS", FOCUS_2006_C, quiet = TRUE)) - - median.C.HS <- as.numeric(lapply(subset(FOCUS_2006_HS_ref_A_to_F, - dataset == "C", - c(M0, k1, k2, tb, DT50, DT90)), "median")) - - dev.percent.C.HS <- calc_dev.percent(fit.C.HS, median.C.HS, round_results = c(2, 4, 6, 2, 2)) - # Not enouth precision in k2 available - expect_equivalent(dev.percent.C.HS[[1]] < c(0.1, 0.1, 0.3, 0.1, 0.1, 0.1), rep(TRUE, 6)) -}) - -test_that("SFO fits give approximately (0.001%) equal results with different solution methods", { - skip_on_cran() - fit.A.SFO.default <- mkinfit("SFO", FOCUS_2006_A, quiet = TRUE)$bparms.optim - - fits.A.SFO <- list() - fits.A.SFO[[1]] <- mkinfit(SFO, FOCUS_2006_A, quiet = TRUE) - fits.A.SFO[[2]] <- mkinfit(SFO, FOCUS_2006_A, quiet = TRUE, solution_type = "eigen") - fits.A.SFO[[3]] <- mkinfit(SFO, FOCUS_2006_A, quiet = TRUE, solution_type = "deSolve") - - dev.percent <- calc_dev.percent(fits.A.SFO, fit.A.SFO.default, endpoints = FALSE) - expect_equivalent(dev.percent[[1]] < 0.001, rep(TRUE, 2)) - expect_equivalent(dev.percent[[2]] < 0.001, rep(TRUE, 2)) - expect_equivalent(dev.percent[[3]] < 0.001, rep(TRUE, 2)) -}) - -test_that("FOMC fits give approximately (0.001%) equal results with different solution methods", { - skip_on_cran() - fit.C.FOMC.default <- mkinfit("FOMC", FOCUS_2006_C, quiet = TRUE)$bparms.optim - - fits.C.FOMC <- list() - fits.C.FOMC[[1]] <- mkinfit(FOMC, FOCUS_2006_C, quiet = TRUE) - fits.C.FOMC[[2]] <- mkinfit(FOMC, FOCUS_2006_C, quiet = TRUE, solution_type = "deSolve") - - dev.percent <- calc_dev.percent(fits.C.FOMC, fit.C.FOMC.default, endpoints = FALSE) - expect_equivalent(dev.percent[[1]] < 0.001, rep(TRUE, 3)) - expect_equivalent(dev.percent[[2]] < 0.001, rep(TRUE, 3)) -}) - -test_that("DFOP fits give approximately (0.001%) equal results with different solution methods", { - skip_on_cran() - fit.C.DFOP.default <- mkinfit("DFOP", FOCUS_2006_C, quiet = TRUE)$bparms.optim - - fits.C.DFOP <- list() - fits.C.DFOP[[1]] <- mkinfit(DFOP, FOCUS_2006_C, quiet = TRUE) - fits.C.DFOP[[2]] <- mkinfit(DFOP, FOCUS_2006_C, quiet = TRUE, solution_type = "deSolve") - - dev.percent <- calc_dev.percent(fits.C.DFOP, fit.C.DFOP.default, endpoints = FALSE) - expect_equivalent(dev.percent[[1]] < 0.001, rep(TRUE, 4)) - expect_equivalent(dev.percent[[2]] < 0.001, rep(TRUE, 4)) -}) - -test_that("SFORB fits give approximately (0.002%) equal results with different solution methods", { - skip_on_cran() - fit.B.SFORB.default <- mkinfit(SFORB, FOCUS_2006_B, quiet=TRUE)$bparms.optim - - fits.B.SFORB <- list() - fits.B.SFORB[[1]] <- mkinfit(SFORB, FOCUS_2006_B, quiet=TRUE, solution_type = "eigen") - fits.B.SFORB[[2]] <- mkinfit(SFORB, FOCUS_2006_B, quiet=TRUE, solution_type = "deSolve") - dev.percent <- calc_dev.percent(fits.B.SFORB, fit.B.SFORB.default, endpoints = FALSE) - expect_equivalent(dev.percent[[1]] < 0.001, rep(TRUE, 4)) - expect_equivalent(dev.percent[[2]] < 0.002, rep(TRUE, 4)) -}) -- cgit v1.2.1