From 5b3daf393831acc4099e1bde3fe4527993529d74 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Wed, 18 Oct 2017 11:28:39 +0200 Subject: Version 3.2 --- CakeCost.R | 225 ++++++++++++++++------------------- CakeErrMin.R | 91 ++++++++++++++ CakeHelpers.R | 118 ++++++++++++++++++ CakeInit.R | 21 ++-- CakeIrlsFit.R | 175 +++++++++++++++------------ CakeMcmcFit.R | 361 ++++++++++++++++++++++++++------------------------------ CakeModel.R | 287 ++++++++++++++++---------------------------- CakeNullPlot.R | 167 +++++++++++++------------- CakeOlsFit.R | 318 ++++++++++++------------------------------------- CakePenalties.R | 70 +++++------ CakePlot.R | 222 ++++++++++++++++++++++++++++++++++ CakeSolutions.R | 42 +++++++ CakeSummary.R | 330 +++++++++++++++++++++++++++++++-------------------- 13 files changed, 1352 insertions(+), 1075 deletions(-) create mode 100755 CakeErrMin.R create mode 100755 CakeHelpers.R create mode 100755 CakePlot.R create mode 100755 CakeSolutions.R diff --git a/CakeCost.R b/CakeCost.R index 40635f0..8fb94ef 100644 --- a/CakeCost.R +++ b/CakeCost.R @@ -1,15 +1,14 @@ ## ----------------------------------------------------------------------------- ## The model cost and residuals ## ----------------------------------------------------------------------------- -# The CAKE R modules are based on mkin, +# Some of the CAKE R modules are based on mkin. # Call to approx is only performed if there are multiple non NA values # which should prevent most of the crashes - Rob Nelson (Tessella) # -# Modifications developed by Tessella Plc for Syngenta, Copyright (C) 2011 Syngenta -# Authors: Rob Nelson, Richard Smith -# Tessella Project Reference: 6245 +# Modifications developed by Tessella for Syngenta, Copyright (C) 2011-2016 Syngenta +# Tessella Project Reference: 6245, 7247, 8361, 7414 # -# This program is free software: you can +# The CAKE R modules are free software: you can # redistribute them and/or modify them under the # terms of the GNU General Public License as published by the Free Software # Foundation, either version 3 of the License, or (at your option) any later @@ -21,9 +20,7 @@ # details. # # You should have received a copy of the GNU General Public License along with -# this program. If not, see # -# -# $Id$ +# this program. If not, see CakeCost <- function (model, obs, x = "time", y = NULL, err = NULL, weight = "none", scaleVar = FALSE, cost = NULL, ...) { @@ -180,23 +177,6 @@ CakeCost <- function (model, obs, x = "time", y = NULL, err = NULL, Res <- (ModVar- obsdat) res <- Res/Err resScaled <- res*Scale - - # print("==========================="); - # print("#Names[i]"); - # print(Names[i]); - # print("xDat"); - # print(xDat); - # print("obsdat"); - # print(obsdat); - # print("ModVar"); - # print(ModVar); - # print("1/Err"); - # print(1/Err); - # print("Res"); - # print(Res); - # print("res"); - # print(res); - # print("==========================="); Residual <- rbind(Residual, data.frame( @@ -217,9 +197,6 @@ CakeCost <- function (model, obs, x = "time", y = NULL, err = NULL, SSR.unscaled = sum(res^2), SSR = sum(resScaled^2))) - # print("************************") - # print(CostVar) - # print("************************") } # end loop over all observed variables ## SSR @@ -268,103 +245,107 @@ plot.modCost<- function(x, legpos="topleft", ...) { ## Internal cost function for optimisers ## ----------------------------------------------------------------------------- # Cost function. The returned structure must have $model -# Called from the middle of IrlsFit and OlsFit # We need to preserve state between calls so make a closure CakeInternalCostFunctions <- function(mkinmod, state.ini.optim, state.ini.optim.boxnames, - state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, - quiet, plot=FALSE, atol=1e-6){ + state.ini.fixed, parms.fixed, observed, mkindiff, + quiet, atol=1e-6, solution="deSolve", err="err"){ cost.old <- 1e+100 calls <- 0 out_predicted <- NA - - get.predicted <- function(){ out_predicted } - - get.best.cost <- function(){ cost.old } - reset.best.cost <- function() { cost.old<<-1e+100 } - - get.calls <- function(){ calls } - set.calls <- function(newcalls){ calls <<- newcalls } - - set.error<-function(err) { observed$err <<- err } - - cost <- function(P) { - assign("calls", calls + 1, inherits = TRUE) - print(P) - if (length(state.ini.optim) > 0) { - odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed) - names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed)) - } - else odeini <- state.ini.fixed - odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], - parms.fixed) - # Ensure initial state is at time 0 - outtimes = unique(c(0,observed$time)) - out <- ode(y = odeini, times = outtimes, func = mkindiff, - parms = odeparms, atol=atol) - out_transformed <- data.frame(time = out[, "time"]) - for (var in names(mkinmod$map)) { - if (length(mkinmod$map[[var]]) == 1) { - out_transformed[var] <- out[, var] - } - else { - out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) - } - } - assign("out_predicted", out_transformed, inherits = TRUE) - mC <- CakeCost(out_transformed, observed, y = "value", - err = 'err', scaleVar = scaleVar) - mC$penalties <- CakePenalties(odeparms, out_transformed, observed) - mC$model <- mC$cost + mC$penalties; - if (mC$model < cost.old) { - if (!quiet) - cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model, - "\n") - if (plot) { - outtimes_plot = seq(min(observed$time), max(observed$time), - length.out = 100) - out_plot <- ode(y = odeini, times = outtimes_plot, - func = mkindiff, parms = odeparms, atol=atol) - out_transformed_plot <- data.frame(time = out_plot[, - "time"]) - for (var in names(mkinmod$map)) { - if (length(mkinmod$map[[var]]) == 1) { - out_transformed_plot[var] <- out_plot[, var] - } - else { - out_transformed_plot[var] <- rowSums(out_plot[, - mkinmod$map[[var]]]) - } - } - plot(0, type = "n", xlim = range(observed$time), - ylim = range(observed$value, na.rm = TRUE), - xlab = "Time", ylab = "Observed") - col_obs <- pch_obs <- 1:length(obs_vars) - names(col_obs) <- names(pch_obs) <- obs_vars - for (obs_var in obs_vars) { - points(subset(observed, name == obs_var, c(time, - value)), pch = pch_obs[obs_var], col = col_obs[obs_var]) - } - matlines(out_transformed_plot$time, out_transformed_plot[-1]) - legend("topright", inset = c(0.05, 0.05), legend = obs_vars, - col = col_obs, pch = pch_obs, lty = 1:length(pch_obs)) - } - assign("cost.old", mC$model, inherits = TRUE) - } - # HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost - if(mC$penalties > 0){ - #cat("Penalty adjustment: ",mC$penalties) - mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res) - } - - return(mC) - } - - - - list(cost=cost, - get.predicted=get.predicted, - get.calls=get.calls, set.calls=set.calls, - get.best.cost=get.best.cost, reset.best.cost=reset.best.cost, - set.error=set.error - ) + + get.predicted <- function(){ out_predicted } + + get.best.cost <- function(){ cost.old } + reset.best.cost <- function() { cost.old<<-1e+100 } + + get.calls <- function(){ calls } + set.calls <- function(newcalls){ calls <<- newcalls } + + set.error<-function(err) { observed$err <<- err } + + # The called cost function + cost <- function(P) { + assign("calls", calls + 1, inherits = TRUE) + print(P) + + if (length(state.ini.optim) > 0) { + odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed) + names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed)) + } else { + odeini <- state.ini.fixed + } + + odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], parms.fixed) + + # Ensure initial state is at time 0 + outtimes = unique(c(0,observed$time)) + + odeini <- AdjustOdeInitialValues(odeini, mkinmod, odeparms) + + if (solution == "analytical") { + evalparse <- function(string) + { + eval(parse(text=string), as.list(c(odeparms, odeini))) + } + + parent.type = names(mkinmod$map[[1]])[1] + parent.name = names(mkinmod$diffs)[[1]] + o <- switch(parent.type, + SFO = SFO.solution(outtimes, + evalparse(parent.name), + evalparse(paste("k", parent.name, sep="_"))), + FOMC = FOMC.solution(outtimes, + evalparse(parent.name), + evalparse("alpha"), evalparse("beta")), + DFOP = DFOP.solution(outtimes, + evalparse(parent.name), + evalparse(paste("k1", parent.name, sep="_")), + evalparse(paste("k2", parent.name, sep="_")), + evalparse(paste("g", parent.name, sep="_"))), + HS = HS.solution(outtimes, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("tb")), + IORE = IORE.solution(outtimes, + evalparse(parent.name), + evalparse(paste("k", parent.name, sep="_")), + evalparse("N"))) + + out <- cbind(outtimes, o) + dimnames(out) <- list(outtimes, c("time", sub("_free", "", parent.name))) + } + if (solution == "deSolve") + { + out <- ode(y = odeini, times = outtimes, func = mkindiff, parms = odeparms, atol = atol) + } + + out_transformed <- PostProcessOdeOutput(out, mkinmod, atol) + + assign("out_predicted", out_transformed, inherits = TRUE) + mC <- CakeCost(out_transformed, observed, y = "value", err = err, scaleVar = FALSE) + mC$penalties <- CakePenalties(odeparms, out_transformed, observed) + mC$model <- mC$cost + mC$penalties + + if (mC$model < cost.old) { + if (!quiet) { + cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model, "\n") + } + + assign("cost.old", mC$model, inherits = TRUE) + } + + # HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost + if(mC$penalties > 0){ + mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res) + } + + return(mC) + } + + list(cost=cost, + get.predicted=get.predicted, + get.calls=get.calls, set.calls=set.calls, + get.best.cost=get.best.cost, reset.best.cost=reset.best.cost, + set.error=set.error + ) } \ No newline at end of file diff --git a/CakeErrMin.R b/CakeErrMin.R new file mode 100755 index 0000000..9a074a4 --- /dev/null +++ b/CakeErrMin.R @@ -0,0 +1,91 @@ +## ----------------------------------------------------------------------------- +## Chi squared errmin function. +## ----------------------------------------------------------------------------- +# Some of the CAKE R modules are based on mkin. +# +# Modifications developed by Tessella for Syngenta, Copyright (C) 2011-2016 Syngenta +# Tessella Project Reference: 6245, 7247, 8361, 7414 +# +# The CAKE R modules are free software: you can +# redistribute them and/or modify them under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see + +if(getRversion() >= '2.15.1') utils::globalVariables(c("name")) + +CakeErrMin <- function(fit, alpha = 0.05) +{ + parms.optim <- fit$par + kinerrmin <- function(errdata, n.parms) { + means.mean <- mean(errdata$value_mean, na.rm=TRUE) + df = length(errdata$value_mean) - n.parms + + err.min <- sqrt((1 / qchisq(1 - alpha, df)) * + sum((errdata$value_mean - errdata$value_pred)^2)/(means.mean^2)) + + return(list(err.min = err.min, n.optim = n.parms, df = df)) + } + + means <- aggregate(value ~ time + name, data = fit$observed, mean, na.rm=TRUE) + errdata <- merge(means, fit$predicted, by = c("time", "name"), + suffixes = c("_mean", "_pred")) + errdata <- errdata[order(errdata$time, errdata$name), ] + + # Remove values at time zero for variables whose value for state.ini is fixed, + # as these will not have any effect in the optimization and should therefore not + # be counted as degrees of freedom. + fixed_initials = gsub("_0$", "", rownames(subset(fit$fixed, type = "state"))) + errdata <- subset(errdata, !(time == 0 & name %in% fixed_initials)) + + n.optim.overall <- length(parms.optim) + + errmin.overall <- kinerrmin(errdata, n.optim.overall) + errmin <- data.frame(err.min = errmin.overall$err.min, + n.optim = errmin.overall$n.optim, df = errmin.overall$df) + rownames(errmin) <- "All data" + + for (obs_var in fit$obs_vars) + { + errdata.var <- subset(errdata, name == obs_var) + + # Check if initial value is optimised + n.initials.optim <- length(grep(paste(obs_var, ".*", "_0", sep=""), names(parms.optim))) + + # Rate constants and DFOP parameters are attributed to the source variable + n.k.optim <- length(grep(paste("^k", obs_var, sep="_"), names(parms.optim))) + n.k1.dfop.optim <- length(grep(paste("^k1", obs_var, sep="_"), names(parms.optim))) + n.k2.dfop.optim <- length(grep(paste("^k2", obs_var, sep="_"), names(parms.optim))) + n.g.dfop.optim <- length(grep(paste("^g", obs_var, sep="_"), names(parms.optim))) + + # Formation fractions are attributed to the target variable + n.ff.optim <- length(grep(paste("^f", ".*", obs_var, "$", sep=""), names(parms.optim))) + + n.optim <- n.k.optim + n.k1.dfop.optim + n.k2.dfop.optim + n.g.dfop.optim + n.initials.optim + n.ff.optim + + # FOMC, HS and IORE parameters are only counted if we are looking at the + # first variable in the model which is always the source variable + if (obs_var == fit$obs_vars[[1]]) { + if ("alpha" %in% names(parms.optim)) n.optim <- n.optim + 1 + if ("beta" %in% names(parms.optim)) n.optim <- n.optim + 1 + if ("k1" %in% names(parms.optim)) n.optim <- n.optim + 1 + if ("k2" %in% names(parms.optim)) n.optim <- n.optim + 1 + if ("tb" %in% names(parms.optim)) n.optim <- n.optim + 1 + if ("N" %in% names(parms.optim)) n.optim <- n.optim + 1 + } + + # Calculate and add a line to the results + errmin.tmp <- kinerrmin(errdata.var, n.optim) + errmin[obs_var, c("err.min", "n.optim", "df")] <- errmin.tmp + } + + return(errmin) +} diff --git a/CakeHelpers.R b/CakeHelpers.R new file mode 100755 index 0000000..5af51cc --- /dev/null +++ b/CakeHelpers.R @@ -0,0 +1,118 @@ +# $Id$ +# Some of the CAKE R modules are based on mkin, +# Developed by Tessella Ltd for Syngenta: Copyright (C) 2011-2016 Syngenta +# Tessella Project Reference: 6245, 7247, 8361, 7414 + +# The CAKE R modules are free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# Shifts parameters slightly away from boundaries specified in "lower" and +# "upper" (to avoid computational issues after parameter transforms in modFit). +ShiftAwayFromBoundaries <- function(parameters, lower, upper) { + parametersOnLowerBound = which(parameters == lower) + parameters[parametersOnLowerBound] <- parameters[parametersOnLowerBound] * (1 + .Machine$double.eps) + .Machine$double.xmin + + parametersOnUpperBound = which(parameters == upper) + parameters[parametersOnUpperBound] <- parameters[parametersOnUpperBound] * (1 - .Machine$double.neg.eps) - .Machine$double.xmin + + return(parameters) +} + +# Adjusts stated initial values to put into the ODE solver. +# +# odeini: The initial values to adjust (in the form that would be fed into the ode function). +# cake.model: The expression of the model that we are solving. +# odeparms: The parameters for the ODE (in the form that would be fed into the ode function). +# +# Returns: Adjusted initial values. +AdjustOdeInitialValues <- function(odeini, cake.model, odeparms) { + odeini.names <- names(odeini) + + for (ini.name in odeini.names) { + # For DFOP metabolites in two compartments, need to calculate some initial conditions for the ODEs. + if (!(ini.name %in% names(cake.model$diffs))){ + subcompartment1.name <- paste(ini.name, "1", sep="_") + subcompartment2.name <- paste(ini.name, "2", sep="_") + + if (subcompartment1.name %in% names(cake.model$diffs) && subcompartment2.name %in% names(cake.model$diffs)){ + g.parameter.name = paste("g", ini.name, sep="_") + + odeini[[subcompartment1.name]] <- odeini[[ini.name]] * odeparms[[g.parameter.name]] + odeini[[subcompartment2.name]] <- odeini[[ini.name]] * (1 - odeparms[[g.parameter.name]]) + } + } + } + + # It is important that these parameters are stated in the same order as the differential equations. + return(odeini[names(cake.model$diffs)]) +} + +# Post-processes the output from the ODE solver (or analytical process), including recombination of sub-compartments. +# +# odeoutput: The output of the ODE solver. +# cake.model: The expression of the model that we are solving. +# atol: The tolerance to which the solution has been calculated. +# +# Returns: Post-processed/transformed ODE output. +PostProcessOdeOutput <- function(odeoutput, cake.model, atol) { + out_transformed <- data.frame(time = odeoutput[, "time"]) + + # Replace values that are incalculably small with 0. + for (col.name in colnames(odeoutput)) { + if (col.name == "time") { + next + } + + # If we have non-NaN, positive outputs... + if (length(odeoutput[, col.name][!is.nan(odeoutput[, col.name]) && odeoutput[, col.name] > 0]) > 0) { + # ...then replace the NaN outputs. + odeoutput[, col.name][is.nan(odeoutput[, col.name])] <- 0 + } + + # Round outputs smaller than the used tolerance down to 0. + odeoutput[, col.name][odeoutput[, col.name] < atol] <- 0 + } + + # Re-combine sub-compartments (if required) + for (compartment.name in names(cake.model$map)) { + if (length(cake.model$map[[compartment.name]]) == 1) { + out_transformed[compartment.name] <- odeoutput[, compartment.name] + } else { + out_transformed[compartment.name] <- rowSums(odeoutput[, cake.model$map[[compartment.name]]]) + } + } + + return(out_transformed) +} + +# Reorganises data in a wide format to a long format. +# +# wide_data: The data in wide format. +# time: The name of the time variable in wide_data (default "t"). +# +# Returns: Reorganised data. +wide_to_long <- function(wide_data, time = "t") { + colnames <- names(wide_data) + + if (!(time %in% colnames)) { + stop("The data in wide format have to contain a variable named ", time, ".") + } + + vars <- subset(colnames, colnames != time) + n <- length(colnames) - 1 + long_data <- data.frame(name = rep(vars, each = length(wide_data[[time]])), + time = as.numeric(rep(wide_data[[time]], n)), value = as.numeric(unlist(wide_data[vars])), + row.names = NULL) + + return(long_data) +} \ No newline at end of file diff --git a/CakeInit.R b/CakeInit.R index 0adab40..a23ad95 100644 --- a/CakeInit.R +++ b/CakeInit.R @@ -2,11 +2,11 @@ # Purpose: Load the modules required by CAKE # Call with chdir=TRUE so it can load our source from the current directory -# Tessella Project Reference: 6245 +# Tessella Project Reference: 6245, 7247, 8361, 7414 -# Copyright (C) 2011 Syngenta +# Copyright (C) 2011-2016 Syngenta -# This program is free software: you can redistribute it and/or modify +# The CAKE R modules are free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. @@ -17,24 +17,29 @@ # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program. If not, see .” +# along with this program. If not, see . -library(mkin) +library(FME) library(Rsolnp) source("CakeModel.R") source("CakeOlsFit.R") source("CakeIrlsFit.R") -source("CakeOlsPlot.R") -source("CakeIrlsPlot.R") +source("CakePlot.R") source("CakeNullPlot.R") source("CakeCost.R") source("CakePenalties.R") source("CakeMcmcFit.R") source("CakeSummary.R") +source("CakeErrMin.R") +source("CakeHelpers.R") +source("CakeSolutions.R") options(width=1000) options(error=recover) options(warn=-1) # When running from C#, this must match the C# CAKE version -CAKE.version<-"2.0" +CAKE.version<-"3.2" + +# The number of data points to use to draw lines on CAKE plots. +CAKE.plots.resolution<-401 diff --git a/CakeIrlsFit.R b/CakeIrlsFit.R index 05eff0a..92c45da 100644 --- a/CakeIrlsFit.R +++ b/CakeIrlsFit.R @@ -1,11 +1,10 @@ #$Id$ # -# The CAKE R modules are based on mkin -# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta -# Authors: Rob Nelson, Richard Smith, Tamar Christina -# Tessella Project Reference: 6245, 7247 +# Some of the CAKE R modules are based on mkin +# Modifications developed by Tessella for Syngenta: Copyright (C) 2011-2016 Syngenta +# Tessella Project Reference: 6245, 7247, 8361, 7414 -# This program is free software: you can redistribute it and/or modify +# The CAKE R modules are free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. @@ -16,31 +15,52 @@ # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program. If not, see .” +# along with this program. If not, see . # -CakeIrlsFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)), - state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), lower = 0, - upper = Inf, fixed_parms = NULL, fixed_initials = names(mkinmod$diffs)[-1], - plot = FALSE, quiet = FALSE, err = NULL, weight = "none", - scaleVar = FALSE, atol=1e-6, sannMaxIter = 10000, control=list(), - useSolnp = FALSE, method='L-BFGS-B',...) -{ -### This is a modification based on the "mkinfit" function. -### version 0.1 July 20 -### -# This version has been modified to expect SFO parameterised as k and flow fractions -# Based on code in IRLSkinfit +# Performs an iteratively-reweighted least squares fit on a given CAKE model. +# Remark: this function was originally based on the "mkinfit" function, version 0.1. +# +# cake.model: The model to perform the fit on (as generated by CakeModel.R). +# observed: Observation data to fit to. +# parms.ini: Initial values for the parameters being fitted. +# state.ini: Initial state (i.e. initial values for concentration, the dependent variable being modelled). +# lower: Lower bounds to apply to parameters. +# upper: Upper bound to apply to parameters. +# fixed_parms: A vector of names of parameters that are fixed to their initial values. +# fixed_initials: A vector of compartments with fixed initial concentrations. +# quiet: Whether the internal cost functions should execute more quietly than normal (less output). +# atol: The tolerance to apply to the ODE solver. +# sannMaxIter: The maximum number of iterations to apply to SANN processes. +# control: ... +# useExtraSolver: Whether to use the extra solver for this fit. +CakeIrlsFit <- function (cake.model, + observed, + parms.ini = rep(0.1, length(cake.model$parms)), + state.ini = c(100, rep(0, length(cake.model$diffs) - 1)), + lower = 0, + upper = Inf, + fixed_parms = NULL, + fixed_initials = names(cake.model$diffs)[-1], + quiet = FALSE, + atol=1e-6, + sannMaxIter = 10000, + control=list(), + useExtraSolver = FALSE, + ...) +{ NAind <-which(is.na(observed$value)) - mod_vars <- names(mkinmod$diffs) - observed <- subset(observed, name %in% names(mkinmod$map)) + mod_vars <- names(cake.model$diffs) + observed <- subset(observed, name %in% names(cake.model$map)) ERR <- rep(1,nrow(observed)) observed <- cbind(observed,err=ERR) - flag <- 0 + fitted_with_extra_solver <- 0 obs_vars = unique(as.character(observed$name)) - if (is.null(names(parms.ini))) - names(parms.ini) <- mkinmod$parms + + if (is.null(names(parms.ini))) { + names(parms.ini) <- cake.model$parms + } mkindiff <- function(t, state, parms) { time <- t @@ -48,12 +68,14 @@ CakeIrlsFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$ for (box in mod_vars) { diffname <- paste("d", box, sep = "_") diffs[diffname] <- with(as.list(c(time, state, parms)), - eval(parse(text = mkinmod$diffs[[box]]))) + eval(parse(text = cake.model$diffs[[box]]))) } return(list(c(diffs))) } - if (is.null(names(state.ini))) + + if (is.null(names(state.ini))) { names(state.ini) <- mod_vars + } parms.fixed <- parms.ini[fixed_parms] optim_parms <- setdiff(names(parms.ini), fixed_parms) @@ -67,15 +89,15 @@ CakeIrlsFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$ names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep = "_") } - - costFunctions <- CakeInternalCostFunctions(mkinmod, state.ini.optim, state.ini.optim.boxnames, - state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, quiet, atol=atol) - + + costFunctions <- CakeInternalCostFunctions(cake.model, state.ini.optim, state.ini.optim.boxnames, + state.ini.fixed, parms.fixed, observed, mkindiff, quiet, atol=atol) + ############### Iteratively Reweighted Least Squares############# ## Start with no weighting ## a prefitting step since this is usually the most effective method - if(useSolnp) + if(useExtraSolver) { pnames=names(c(state.ini.optim, parms.optim)) fn <- function(P){ @@ -83,28 +105,28 @@ CakeIrlsFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$ FF<<-costFunctions$cost(P) return(FF$model)} a <- try(fit <- solnp(c(state.ini.optim, parms.optim),fun=fn,LB=lower,UB=upper,control=control),silent=TRUE) - #optimmethod <- method0 - flag <- 1 + fitted_with_extra_solver <- 1 if(class(a) == "try-error") { print('solnp fails, try PORT or other algorithm by users choice, might take longer time. Do something else!') warning('solnp fails, switch to PORT or other algorithm by users choice') ## now using submethod already a <- try(fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='Port',control=control)) - flag <- 0 + fitted_with_extra_solver <- 0 if(class(a) == "try-error") { - fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method=method,control=control) + fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='L-BFGS-B',control=control) } } - }else{ - fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, + }else{ + # modFit parameter transformations can explode if you put in parameters that are equal to a bound, so we move them away by a tiny amount. + all.optim <- ShiftAwayFromBoundaries(c(state.ini.optim, parms.optim), lower, upper) + + fit <- modFit(costFunctions$cost, all.optim, lower = lower, upper = upper,control=control,...) } - ## print(fit$hessian) - if(length(control)==0) { irls.control <- list(maxIter=10,tol=1e-05) @@ -122,15 +144,15 @@ CakeIrlsFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$ tol <- irls.control$tol #### - if(length(mkinmod$map)==1 || useSolnp){ + if(length(cake.model$map)==1){ ## there is only one parent just do one iteration: maxIter <- 0 - if(flag==1)## fit from solnp + if(fitted_with_extra_solver==1)## managed to fit with extra solver { fit$ssr <- fit$values[length(fit$values)] fit$residuals <-FF$residual$res - ## mean square per varaible + ## mean square per variable if (class(FF) == "modCost") { names(fit$residuals) <- FF$residuals$name fit$var_ms <- FF$var$SSR/FF$var$N @@ -149,14 +171,14 @@ CakeIrlsFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$ niter <- 1 ## insure one IRLS iteration diffsigma <- 100 - olderr <- rep(1,length(mod_vars)) + olderr <- rep(1,length(cake.model$map)) while(diffsigma>tol & niter<=maxIter) - { - if(flag==1 && useSolnp)## fit from solnp + { + if(fitted_with_extra_solver==1 && useExtraSolver)## managed to fit with extra solver { fit$ssr <- fit$values[length(fit$values)] fit$residuals <-FF$residual$res - ## mean square per varaible + ## mean square per variable if (class(FF) == "modCost") { names(fit$residuals) <- FF$residuals$name fit$var_ms <- FF$var$SSR/FF$var$N @@ -168,7 +190,6 @@ CakeIrlsFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$ } else fit$var_ms <- fit$var_ms_unweighted <- fit$var_ms_unscaled <- NA } - err <- sqrt(fit$var_ms_unweighted) ERR <- err[as.character(observed$name)] costFunctions$set.error(ERR) @@ -176,31 +197,32 @@ CakeIrlsFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$ cat("IRLS iteration at",niter, "; Diff in error variance ", diffsigma,"\n") olderr <- err - if(useSolnp) + if(useExtraSolver) { - flag <- 1 + fitted_with_extra_solver <- 1 a <- try(fit <- solnp(fit$par,fun=fn,LB=lower,UB=upper,control=control),silent=TRUE) if(class(a) == "try-error") { - flag <- 0 + fitted_with_extra_solver <- 0 print('solnp fails during IRLS iteration, try PORT or other algorithm by users choice.This may takes a while. Do something else!') ## NOTE: because in kingui we switch off the warnings, we need to print out the message instead. warning('solnp fails during IRLS iteration, switch to PORT or other algorithm by users choice') fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, method='Port',control=list()) } }else{ + # modFit parameter transformations can explode if you put in parameters that are equal to a bound, so we move them away by a tiny amount. + fit$par <- ShiftAwayFromBoundaries(fit$par, lower, upper) + fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, control=control, ...) } - - ## print(fit$hessian) niter <- niter+1 ### If not converged, reweight and fit - } + } - if(flag==1 && useSolnp){ + if(fitted_with_extra_solver==1 && useExtraSolver){ ## solnp used optimmethod <- 'solnp' fit$ssr <- fit$values[length(fit$values)] @@ -218,47 +240,50 @@ CakeIrlsFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$ np <- length(c(state.ini.optim, parms.optim)) fit$rank <- np fit$df.residual <- length(fit$residuals) - fit$rank + + # solnp can return an incorrect Hessian, so we use another fitting method at the optimised point to determine the Hessian + fitForHessian <- modFit(costFunctions$cost, fit$par, lower=lower, upper=upper, method='L-BFGS-B', control=list()) + + fit$solnpHessian <- fit$hessian + fit$hessian <- fitForHessian$hessian } - - ########################################### + + ########################################### fit$mkindiff <- mkindiff - fit$map <- mkinmod$map - fit$diffs <- mkinmod$diffs - - out_predicted <- costFunctions$get.predicted() + fit$map <- cake.model$map + fit$diffs <- cake.model$diffs - # mkin_long_to_wide does not handle ragged data - fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") - names(fit$observed) <- c("time", as.vector(unique(observed$name))) + out_predicted <- costFunctions$get.predicted() - predicted_long <- mkin_wide_to_long(out_predicted, time = "time") + predicted_long <- wide_to_long(out_predicted, time = "time") fit$predicted <- out_predicted fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) fit$start$type = c(rep("state", length(state.ini.optim)), - rep("deparm", length(parms.optim))) + rep("deparm", length(parms.optim))) fit$start$lower <- lower fit$start$upper <- upper fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) fit$fixed$type = c(rep("state", length(state.ini.fixed)), - rep("deparm", length(parms.fixed))) + rep("deparm", length(parms.fixed))) - fit$errmin <- CakeChi2(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini) - parms.all = c(fit$par, parms.fixed) - fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed) + fit$errmin <- CakeChi2(cake.model, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini, fit$fixed) + parms.all = c(fit$par, parms.fixed, state.ini) + fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed) fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) - fit$extraDT50<- data.frame(DT50 = rep(NA, 2), row.names = c("k1", "k2")) + fit$extraDT50<- data.frame(k1 = rep(NA, length(names(cake.model$map))), k2 = rep(NA, length(names(cake.model$map))), row.names = names(cake.model$map)) - for (obs_var in obs_vars) { - type = names(mkinmod$map[[obs_var]])[1] - fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all,sannMaxIter) + for (compartment.name in names(cake.model$map)) { + type = names(cake.model$map[[compartment.name]])[1] + fit$distimes[compartment.name, ] = CakeDT(type,compartment.name,parms.all,sannMaxIter) + fit$extraDT50[compartment.name, ] = CakeExtraDT(type, compartment.name, parms.all) } - fit$extraDT50[ ,c("DT50")] = CakeExtraDT(type,parms.all) + fit$ioreRepDT = CakeIORERepresentativeDT("Parent", parms.all) + fit$fomcRepDT = CakeFOMCBackCalculatedDT(parms.all) data <- merge(observed, predicted_long, by = c("time", "name")) - names(data) <- c("time", "variable", "observed","err-var", "predicted") data$residual <- data$observed - data$predicted data$variable <- ordered(data$variable, levels = obs_vars) @@ -268,6 +293,8 @@ CakeIrlsFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$ fit$upper <- upper fit$lower <- lower + fit$atol <- atol + class(fit) <- c("CakeFit", "mkinfit", "modFit") return(fit) } diff --git a/CakeMcmcFit.R b/CakeMcmcFit.R index f9a340e..65c9431 100644 --- a/CakeMcmcFit.R +++ b/CakeMcmcFit.R @@ -1,11 +1,9 @@ -# $Id$ -# The CAKE R modules are based on mkin, +# Some of the CAKE R modules are based on mkin, # Based on mcmckinfit as modified by Bayer -# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta -# Author: Rob Nelson, Tamar Christina -# Tessella Project Reference: 6245, 7247 +# Modifications developed by Tessella for Syngenta: Copyright (C) 2011-2016 Syngenta +# Tessella Project Reference: 6245, 7247, 8361, 7414 -# This program is free software: you can redistribute it and/or modify +# The CAKE R modules are free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. @@ -16,168 +14,155 @@ # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program. If not, see .” +# along with this program. If not, see . -CakeMcmcFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)), - state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), lower = 0, - upper = Inf, fixed_parms = NULL, fixed_initials = names(mkinmod$diffs)[-1], - plot = FALSE, quiet = FALSE, err = NULL, weight = "none", - scaleVar = FALSE, commonsigma=FALSE,jump = NULL,prior = NULL, - wvar0=0.1,niter = 1000, - outputlength = niter, burninlength = 0, updatecov = niter, - ntrydr = 1, drscale = NULL, verbose = TRUE,fitstart=TRUE,seed=NULL,atol=1e-6, - sannMaxIter = 10000, control=list(), - useSolnp = FALSE, method='L-BFGS-B',...) +# Performs an Markov chain Monte Carlo least squares fit on a given CAKE model. +# +# cake.model: The model to perform the fit on (as generated by CakeModel.R). +# observed: Observation data to fit to. +# parms.ini: Initial values for the parameters being fitted. +# state.ini: Initial state (i.e. initial values for concentration, the dependent variable being modelled). +# lower: Lower bounds to apply to parameters. +# upper: Upper bound to apply to parameters. +# fixed_parms: A vector of names of parameters that are fixed to their initial values. +# fixed_initials: A vector of compartments with fixed initial concentrations. +# quiet: Whether the internal cost functions should execute more quietly than normal (less output). +# niter: The number of MCMC iterations to apply. +# atol: The tolerance to apply to the ODE solver. +# sannMaxIter: The maximum number of iterations to apply to SANN processes. +# control: ... +# useExtraSolver: Whether to use the extra solver for this fit (only used for the initial first fit). +CakeMcmcFit <- function (cake.model, + observed, + parms.ini, + state.ini, + lower, + upper, + fixed_parms = NULL, + fixed_initials, + quiet = FALSE, + niter = 1000, + verbose = TRUE, + seed=NULL, + atol=1e-6, + sannMaxIter = 10000, + control=list(), + useExtraSolver = FALSE, + ...) { - NAind <-which(is.na(observed$value)) - mod_vars <- names(mkinmod$diffs) - observed <- subset(observed, name %in% names(mkinmod$map)) + mod_vars <- names(cake.model$diffs) + observed <- subset(observed, name %in% names(cake.model$map)) ERR <- rep(1,nrow(observed)) observed <- cbind(observed,err=ERR) obs_vars = unique(as.character(observed$name)) - if (is.null(names(parms.ini))) - names(parms.ini) <- mkinmod$parms + + if (is.null(names(parms.ini))) { + names(parms.ini) <- cake.model$parms + } + mkindiff <- function(t, state, parms) { time <- t diffs <- vector() + for (box in mod_vars) { diffname <- paste("d", box, sep = "_") diffs[diffname] <- with(as.list(c(time, state, parms)), - eval(parse(text = mkinmod$diffs[[box]]))) + eval(parse(text = cake.model$diffs[[box]]))) } + return(list(c(diffs))) } - if (is.null(names(state.ini))) + + if (is.null(names(state.ini))) { names(state.ini) <- mod_vars + } + parms.fixed <- parms.ini[fixed_parms] optim_parms <- setdiff(names(parms.ini), fixed_parms) parms.optim <- parms.ini[optim_parms] state.ini.fixed <- state.ini[fixed_initials] optim_initials <- setdiff(names(state.ini), fixed_initials) state.ini.optim <- state.ini[optim_initials] - flag <- 0 state.ini.optim.boxnames <- names(state.ini.optim) + if (length(state.ini.optim) > 0) { names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep = "_") } - costFunctions <- CakeInternalCostFunctions(mkinmod, state.ini.optim, state.ini.optim.boxnames, - state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, quiet, atol=atol) - bestIteration <- -1; - costWithStatus <- function(P, ...){ - r <- costFunctions$cost(P) - if(r$cost == costFunctions$get.best.cost()) { - bestIteration<<-costFunctions$get.calls(); - cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n') - } - cat("MCMC Call no.", costFunctions$get.calls(), '\n') - return(r) - } + costFunctions <- CakeInternalCostFunctions(cake.model, state.ini.optim, state.ini.optim.boxnames, + state.ini.fixed, parms.fixed, observed, mkindiff, quiet, atol=atol) + bestIteration <- -1; + costWithStatus <- function(P, ...){ + r <- costFunctions$cost(P) + if(r$cost == costFunctions$get.best.cost()) { + bestIteration<<-costFunctions$get.calls(); + cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n') + } + cat("MCMC Call no.", costFunctions$get.calls(), '\n') + return(r) + } # Set the seed if ( is.null(seed) ) { # No seed so create a random one so there is something to report seed = runif(1,0,2^31-1) } + seed = as.integer(seed) set.seed(seed) - if(fitstart==TRUE) - { - ## ############# Get Initial Paramtervalues ############# + ## ############# Get Initial Paramtervalues ############# ## Start with no weighting - - if(useSolnp) + if(useExtraSolver) { - pnames=names(c(state.ini.optim, parms.optim)) - fn <- function(P){ - names(P) <- pnames - FF<<-cost(P) - return(FF$model)} - a <- try(fit <- solnp(c(state.ini.optim, parms.optim),fun=fn,LB=lower,UB=upper,control=control),silent=TRUE) - flag <- 1 - optimmethod <- 'solnp' + a <- try(fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='Port',control=control)) + if(class(a) == "try-error") { - print('solnp fails, try PORT or other algorithm by users choice, might take longer time. Do something else!') - warning('solnp fails, switch to PORT or other algorithm by users choice') - a <- try(fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='Port',control=control)) - flag <- 0 - - if(class(a) == "try-error") - { - fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method=method,control=control) - } - ## now using submethod already + fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='L-BFGS-B',control=control) } } else { - fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, + # modFit parameter transformations can explode if you put in parameters that are equal to a bound, so we move them away by a tiny amount. + all.optim <- ShiftAwayFromBoundaries(c(state.ini.optim, parms.optim), lower, upper) + + fit <- modFit(costFunctions$cost, all.optim, lower = lower, upper = upper,...) - } + } - if(commonsigma==TRUE){ - #observed$err <- rep(1,nrow(observed)) - if(flag==1 && useSolnp)## fit from solnp - { - ## run a small loop with 'Marq' or some other method - fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, method='Port',control=control) - } - cov0 <- summary(fit)$cov.scaled*2.4^2/length(fit$par) - costFunctions$set.calls(0); costFunctions$reset.best.cost() - res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose) - #res <- modMCMC(cost,fit$par,lower=lower,upper=upper,niter=niter) - }else{ - ## ############## One reweighted estimation ############################ - ## Estimate the error variance(sd) - tmpres <- fit$residuals - oldERR <- observed$err - err <- rep(NA,length(mod_vars)) - for(i in 1:length(mod_vars)) - { - box <- mod_vars[i] - ind <- which(names(tmpres)==box) - tmp <- tmpres[ind] - err[i] <- sd(tmp) - } - names(err) <- mod_vars - ERR <- err[as.character(observed$name)] - observed$err <-ERR - costFunctions$set.error(ERR) - if(flag==1 && useSolnp)## fit from solnp - { - fit$ssr <- fit$values[length(fit$values)] - fit$residuals <-FF$residual$res - ## mean square per varaible - if (class(FF) == "modCost") { - names(fit$residuals) <- FF$residuals$name - fit$var_ms <- FF$var$SSR/FF$var$N - fit$var_ms_unscaled <- FF$var$SSR.unscaled/FF$var$N - fit$var_ms_unweighted <- FF$var$SSR.unweighted/FF$var$N - - names(fit$var_ms_unweighted) <- names(fit$var_ms_unscaled) <- - names(fit$var_ms) <- FF$var$name - } else fit$var_ms <- fit$var_ms_unweighted <- fit$var_ms_unscaled <- NA - } - olderr <- rep(1,length(mod_vars)) - diffsigma <- sum((err-olderr)^2) - ## At least do one iteration step to get a weighted LS - fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, ...) - ## Use this as the Input for MCMC algorithm - ## ########################## - fs <- summary(fit) - cov0 <- if(all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled*2.4^2/length(fit$par) - var0 <- fit$var_ms_unweighted - costFunctions$set.calls(0); costFunctions$reset.best.cost() - res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose) - + ## ############## One reweighted estimation ############################ + ## Estimate the error variance(sd) + tmpres <- fit$residuals + oldERR <- observed$err + err <- rep(NA,length(cake.model$map)) + + for(i in 1:length(cake.model$map)) { + box <- names(cake.model$map)[i] + ind <- which(names(tmpres)==box) + tmp <- tmpres[ind] + err[i] <- sd(tmp) } - }else { - stop('fitstart=FALSE code removed until needed') - } + + names(err) <- names(cake.model$map) + ERR <- err[as.character(observed$name)] + observed$err <-ERR + costFunctions$set.error(ERR) + + olderr <- rep(1,length(cake.model$map)) + diffsigma <- sum((err-olderr)^2) + ## At least do one iteration step to get a weighted LS + fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, ...) + ## Use this as the Input for MCMC algorithm + ## ########################## + fs <- summary(fit) + cov0 <- if(all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled*2.4^2/length(fit$par) + var0 <- fit$var_ms_unweighted + costFunctions$set.calls(0); costFunctions$reset.best.cost() + res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=NULL,var0=var0,wvar0=0.1,niter=niter,outputlength=niter,burninlength=0,updatecov=niter,ntrydr=1,drscale=NULL,verbose=verbose) + # Construct the fit object to return fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) fit$start$type = c(rep("state", length(state.ini.optim)), @@ -189,92 +174,72 @@ CakeMcmcFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$ rep("deparm", length(parms.fixed))) fit$mkindiff <- mkindiff - fit$map <- mkinmod$map - fit$diffs <- mkinmod$diffs - - # mkin_long_to_wide does not handle ragged data - fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") - #names(fit$observed) <- c("time", as.vector(unique(observed$name))) - fns <- function(str) { - fields <- strsplit(str, "\\.")[[1]] - if (fields[1] == "value") { - return(fields[2]) - } - else { - return(str) - } - } - names(fit$observed) <- sapply(names(fit$observed), fns) + fit$map <- cake.model$map + fit$diffs <- cake.model$diffs - # Replace mean from modFit with mean from modMCMC - fnm <- function(x) mean(res$pars[,x]) - fit$par <- sapply(dimnames(res$pars)[[2]],fnm) - fit$bestpar <- res$bestpar - fit$costfn <- costWithStatus + # Replace mean from modFit with mean from modMCMC + fnm <- function(x) mean(res$pars[,x]) + fit$par <- sapply(dimnames(res$pars)[[2]],fnm) + fit$bestpar <- res$bestpar + fit$costfn <- costWithStatus - # Disappearence times - parms.all <- c(fit$par, parms.fixed) - obs_vars = unique(as.character(observed$name)) - fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), + # Disappearence times + parms.all <- c(fit$par, parms.fixed) + obs_vars = unique(as.character(observed$name)) + fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) - fit$extraDT50<- data.frame(DT50 = rep(NA, 2), row.names = c("k1", "k2")) - for (obs_var in obs_vars) { - type = names(mkinmod$map[[obs_var]])[1] - fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all,sannMaxIter) + fit$extraDT50<- data.frame(k1 = rep(NA, length(names(cake.model$map))), k2 = rep(NA, length(names(cake.model$map))), row.names = names(cake.model$map)) + + for (compartment.name in names(cake.model$map)) { + type = names(cake.model$map[[compartment.name]])[1] + fit$distimes[compartment.name, ] = CakeDT(type,compartment.name,parms.all,sannMaxIter) + fit$extraDT50[compartment.name, ] = CakeExtraDT(type, compartment.name, parms.all) } - - fit$extraDT50[ ,c("DT50")] = CakeExtraDT(type,parms.all) - # Fits to data - odeini <- state.ini - odeparms <- parms.all - # If this step modelled the start amount, include it. Lookup in vectors returns NA not null if missing - if(!is.na(odeparms["Parent_0"])) { odeini['Parent'] = odeparms["Parent_0"] } + fit$ioreRepDT = CakeIORERepresentativeDT("Parent", parms.all) + fit$fomcRepDT = CakeFOMCBackCalculatedDT(parms.all) + + # Fits to data + odeini <- state.ini + odeparms <- parms.all + # If this step modelled the start amount, include it. Lookup in vectors returns NA not null if missing + if(!is.na(odeparms["Parent_0"])) { odeini['Parent'] = odeparms["Parent_0"] } - # Solve the system - evalparse <- function(string) - { - eval(parse(text=string), as.list(c(odeparms, odeini))) - } + # Solve the system + evalparse <- function(string) + { + eval(parse(text=string), as.list(c(odeparms, odeini))) + } - # Ensure initial state is at time 0 - obstimes <- unique(c(0,observed$time)) + # Ensure initial state is at time 0 + obstimes <- unique(c(0,observed$time)) - out <- ode( - y = odeini, - times = obstimes, - func = mkindiff, - parms = odeparms, - ) - - # Output transformation for models with unobserved compartments like SFORB - out_transformed <- data.frame(time = out[,"time"]) - for (var in names(mkinmod$map)) { - if(length(mkinmod$map[[var]]) == 1) { - out_transformed[var] <- out[, var] - } else { - out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) - } - } - fit$predicted <- out_transformed - fit$penalties <- CakePenaltiesLong(odeparms, out_transformed, observed) + odeini <- AdjustOdeInitialValues(odeini, cake.model, odeparms) + + out <- ode(y = odeini, times = obstimes, func = mkindiff, parms = odeparms, atol = atol) + + out_transformed <- PostProcessOdeOutput(out, cake.model, atol) + + fit$predicted <- out_transformed + fit$penalties <- CakePenaltiesLong(odeparms, out_transformed, observed) - predicted_long <- mkin_wide_to_long(out_transformed,time="time") - obs_vars = unique(as.character(observed$name)) - fit$errmin <- CakeChi2(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini) + predicted_long <- wide_to_long(out_transformed,time="time") + obs_vars = unique(as.character(observed$name)) + fit$errmin <- CakeChi2(cake.model, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini, fit$fixed) - data<-observed - data$err<-rep(NA,length(data$time)) - data<-merge(data, predicted_long, by=c("time","name")) - names(data)<-c("time", "variable", "observed","err-var", "predicted") - data$residual<-data$observed-data$predicted - data$variable <- ordered(data$variable, levels = obs_vars) - fit$data <- data[order(data$variable, data$time), ] + data<-observed + data$err<-rep(NA,length(data$time)) + data<-merge(data, predicted_long, by=c("time","name")) + names(data)<-c("time", "variable", "observed","err-var", "predicted") + data$residual<-data$observed-data$predicted + data$variable <- ordered(data$variable, levels = obs_vars) + fit$data <- data[order(data$variable, data$time), ] + fit$atol <- atol - sq <- data$residual * data$residual - fit$ssr <- sum(sq) + sq <- data$residual * data$residual + fit$ssr <- sum(sq) - fit$seed = seed + fit$seed = seed fit$res <- res class(fit) <- c("CakeMcmcFit", "mkinfit", "modFit") @@ -355,10 +320,14 @@ summary.CakeMcmcFit <- function(object, data = TRUE, distimes = TRUE, halflives ans$start <- object$start ans$fixed <- object$fixed ans$errmin <- object$errmin - if(distimes) ans$distimes <- object$distimes + if(distimes){ + ans$distimes <- object$distimes + ans$extraDT50 <- object$extraDT50 + ans$ioreRepDT <- object$ioreRepDT + ans$fomcRepDT <- object$fomcRepDT + } if(halflives) ans$halflives <- object$halflives if(ff) ans$ff <- object$ff - if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.modFit") return(ans) } diff --git a/CakeModel.R b/CakeModel.R index ce6bc01..3a32779 100644 --- a/CakeModel.R +++ b/CakeModel.R @@ -1,15 +1,13 @@ # Was Id: mkinmod.R 71 2010-09-12 01:13:36Z jranke -# $Id$ -# The CAKE R modules are based on mkin +# Some of the CAKE R modules are based on mkin # Portions Johannes Ranke, 2010 # Contact: mkin-devel@lists.berlios.de -# This version has been modified to parameterise SFO as k and flow fractions -# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta -# Tessella Project Reference: 6245 +# Modifications developed by Tessella for Syngenta: Copyright (C) 2011-2016 Syngenta +# Tessella Project Reference: 6245, 7247, 8361, 7414 -# This program is free software: you can redistribute it and/or modify +# The CAKE R modules are free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. @@ -20,98 +18,80 @@ # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program. If not, see .” +# along with this program. If not, see . CakeModel <- function(..., use_of_ff = "max") { spec <- list(...) obs_vars <- names(spec) - if (!use_of_ff %in% c("min", "max")) - stop("The use of formation fractions 'use_of_ff' can only be 'min' or 'max'") + if (!use_of_ff %in% c("min", "max")) stop("The use of formation fractions 'use_of_ff' can only be 'min' or 'max'") + # differential equations, parameter names and a mapping from model variables # to observed variables. If possible, a matrix representation of the # differential equations is included parms <- vector() diffs <- vector() map <- list() - if(spec[[1]]$type %in% c("FOMC", "DFOP", "HS")) { - mat = FALSE - if(!is.null(spec[[1]]$to)) { - message <- paste("Only constant formation fractions over time are implemented.", - "Depending on the reason for the time dependence of degradation, this may be unrealistic", - sep="\n") - warning(message) - } else message <- "ok" - } else mat = TRUE - - mat = FALSE # XYZZY Not implemented yet assuming it should be nlt = list() # Establish list of differential equations for (varname in obs_vars) { - if(is.null(spec[[varname]]$type)) stop( - "Every argument to mkinmod must be a list containing a type component") - if(!spec[[varname]]$type %in% c("SFO", "FOMC", "DFOP", "HS", "SFORB")) stop( - "Available types are SFO, FOMC, DFOP, HS and SFORB only") + if(is.null(spec[[varname]]$type)) stop("Every argument to CakeModel must be a list containing a type component") + if(!spec[[varname]]$type %in% c("SFO", "FOMC", "DFOP", "HS", "IORE")) stop("Available types are SFO, FOMC, DFOP, HS and IORE only") + new_parms <- vector() # New (sub)compartments (boxes) needed for the model type - new_boxes <- switch(spec[[varname]]$type, - SFO = varname, - FOMC = varname, - DFOP = varname, - HS = varname, - SFORB = paste(varname, c("free", "bound"), sep="_") - ) + if (spec[[varname]]$type == "DFOP" && varname != "Parent"){ + # If this is a DFOP metabolite, we need to form a system of two differential equations + # (see FOCUS page 137) + new_boxes <- paste(varname, c("1", "2"), sep="_") + } else{ + new_boxes <- varname + } + map[[varname]] <- new_boxes names(map[[varname]]) <- rep(spec[[varname]]$type, length(new_boxes)) # Start a new differential equation for each new box new_diffs <- paste("d_", new_boxes, " =", sep="") - # Get the name of the box(es) we are working on for the decline term(s) - box_1 = map[[varname]][[1]] # This is the only box unless type is SFORB - # Turn on sink if not specified otherwise if(is.null(spec[[varname]]$sink)) spec[[varname]]$sink <- TRUE - #@@@ADD SFO k HERE !!!!!!!!!!!!! # Construct and add SFO term and add SFO parameters if needed if(spec[[varname]]$type == "SFO") { if (use_of_ff == "min") { # Minimum use of formation fractions if(spec[[varname]]$sink) { - # From p. 53 of the FOCUS kinetics report - k_term <- paste("k", new_boxes[[1]], "sink", sep="_") - nonlinear_term <- paste(k_term, new_boxes[[1]], sep=" * ") - spec[[varname]]$nlt<-nonlinear_term - new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) - new_parms <- k_term - ff <- vector() - decline_term <- paste(nonlinear_term, "*", new_boxes[[1]]) - } else { # otherwise no decline term needed here - decline_term = "0" - } - } else { - k_term <- paste("k", new_boxes[[1]], sep="_") - nonlinear_term <- paste(k_term, new_boxes[[1]], sep=" * ") - spec[[varname]]$nlt<-nonlinear_term - new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) - new_parms <- k_term - ff <- vector() - decline_term <- paste(nonlinear_term, "*", new_boxes[[1]]) - } - } + # From p. 53 of the FOCUS kinetics report + k_term <- paste("k", new_boxes[[1]], "sink", sep="_") + nonlinear_term <- paste(k_term, new_boxes[[1]], sep=" * ") + spec[[varname]]$nlt<-nonlinear_term + new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) + new_parms <- k_term + ff <- vector() + decline_term <- paste(nonlinear_term, "*", new_boxes[[1]]) + } else { # otherwise no decline term needed here + decline_term = "0" + } + } else { + k_term <- paste("k", new_boxes[[1]], sep="_") + nonlinear_term <- paste(k_term, new_boxes[[1]], sep=" * ") + spec[[varname]]$nlt<-nonlinear_term + new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) + new_parms <- k_term + ff <- vector() + decline_term <- paste(nonlinear_term, "*", new_boxes[[1]]) + } + } # Construct and add FOMC term and add FOMC parameters if needed if(spec[[varname]]$type == "FOMC") { if(match(varname, obs_vars) != 1) { stop("Type FOMC is only possible for the first compartment, which is assumed to be the source compartment") } -# if(spec[[varname]]$sink == FALSE) { -# stop("Turning off the sink for the FOMC model is not implemented") -# } # From p. 53 of the FOCUS kinetics report nonlinear_term <- paste("(alpha/beta) * ((time/beta) + 1)^-1 *", new_boxes[[1]]) spec[[varname]]$nlt<-nonlinear_term @@ -122,18 +102,31 @@ CakeModel <- function(..., use_of_ff = "max") # Construct and add DFOP term and add DFOP parameters if needed if(spec[[varname]]$type == "DFOP") { - if(match(varname, obs_vars) != 1) { - stop("Type DFOP is only possible for the first compartment, which is assumed to be the source compartment") - } -# if(spec[[varname]]$sink == FALSE) { -# stop("Turning off the sink for the DFOP model is not implemented") -# } - # From p. 57 of the FOCUS kinetics report - nonlinear_term <- paste("((k1 * g * exp(-k1 * time) + k2 * (1 - g) * exp(-k2 * time)) / (g * exp(-k1 * time) + (1 - g) * exp(-k2 * time))) *", new_boxes[[1]]) - spec[[varname]]$nlt<-nonlinear_term - new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) - new_parms <- c("k1", "k2", "g") + k1_term = paste("k1", varname, sep="_") + k2_term = paste("k2", varname, sep="_") + g_term = paste("g", varname, sep="_") + + new_parms <- c(k1_term, k2_term, g_term) ff <- vector() + + if (varname == "Parent"){ + # From p. 57 of the FOCUS kinetics report + # Looks like this: paste("((k1 * g * exp(-k1 * time) + k2 * (1 - g) * exp(-k2 * time)) / (g * exp(-k1 * time) + (1 - g) * exp(-k2 * time))) *", new_boxes[[1]]) + nonlinear_term <- paste("((", k1_term, " * ", g_term, " * exp(-", k1_term, " * time) + ", k2_term, " * (1 - ", g_term, ") * exp(-", k2_term, " * time)) / (", g_term, " * exp(-", k1_term, " * time) + (1 - ", g_term, ") * exp(-", k2_term, " * time))) *", new_boxes[[1]]) + + spec[[varname]]$nlt<-nonlinear_term + new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) + } else { + # From p. 137 of the FOCUS kinetics report - two sub-compartments with g applied to initial conditions (in ODE pre-processing) and in formation terms. + first_nonlinear_term <- paste(k1_term, new_boxes[[1]], sep=" * ") + second_nonlinear_term <- paste(k2_term, new_boxes[[2]], sep=" * ") + + overall_term_for_formation <- paste("(", g_term, " * ", first_nonlinear_term, " + (1 - ", g_term, ") *", second_nonlinear_term, ")") + spec[[varname]]$nlt <- overall_term_for_formation + + new_diffs[[1]] <- paste(new_diffs[[1]], "-", first_nonlinear_term) + new_diffs[[2]] <- paste(new_diffs[[2]], "-", second_nonlinear_term) + } } # Construct and add HS term and add HS parameters if needed @@ -141,82 +134,52 @@ CakeModel <- function(..., use_of_ff = "max") if(match(varname, obs_vars) != 1) { stop("Type HS is only possible for the first compartment, which is assumed to be the source compartment") } -# if(spec[[varname]]$sink == FALSE) { -# stop("Turning off the sink for the HS model is not implemented") -# } - # From p. 55 of the FOCUS kinetics report -# nonlinear_term <- paste("ifelse(time <= tb, k1, k2)", "*", new_boxes[[1]]) - nonlinear_term <- paste("((k1 - k2) / ( 1 + exp( 10*(time - tb) ) ) + k2)", "*", new_boxes[[1]]) + # From p. 55 of the FOCUS kinetics report: nonlinear_term <- paste("ifelse(time <= tb, k1, k2)", "*", new_boxes[[1]]) + # Replaced this with the smoothed version on the next line to ease convergence. + nonlinear_term <- paste("((k1 - k2) / ( 1 + exp( 300*(time - tb)/(tb + 0.00001) ) ) + k2)", "*", new_boxes[[1]]) spec[[varname]]$nlt<-nonlinear_term new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) new_parms <- c("k1", "k2", "tb") ff <- vector() } - # Construct terms for transfer to sink and add if appropriate - - #@@@@ REMOVE THIS ????? - if(spec[[varname]]$sink) { - # Add first-order sink term to first (or only) box for SFO and SFORB -# if(spec[[varname]]$type %in% c("SFO", "SFORB")) { - if(spec[[varname]]$type == "SFORB") { - k_compound_sink <- paste("k", new_boxes[[1]], "sink", sep="_") - sink_term <- paste("-", k_compound_sink, "*", new_boxes[[1]]) - new_diffs[[1]] <- paste(new_diffs[[1]], sink_term) - new_parms <- k_compound_sink - } + # Construct and add IORE term and add IORE parameters if needed + if(spec[[varname]]$type == "IORE") { + k_term <- paste("k", new_boxes[[1]], sep="_") + nonlinear_term <- paste(k_term, new_boxes[[1]], sep=" * ") + nonlinear_term <- paste(nonlinear_term, "^N") + spec[[varname]]$nlt<-nonlinear_term + new_diffs[[1]] <- paste(new_diffs[[1]], "-", nonlinear_term) + new_parms <- c(k_term, "N") + ff <- vector() } - - # Add reversible binding if appropriate - if(spec[[varname]]$type == "SFORB") { - k_free_bound <- paste("k", varname, "free", "bound", sep="_") - k_bound_free <- paste("k", varname, "bound", "free", sep="_") - reversible_binding_terms <- c( - paste("-", k_free_bound, "*", new_boxes[[1]], "+", k_bound_free, "*", new_boxes[[2]]), - paste("+", k_free_bound, "*", new_boxes[[1]], "-", k_bound_free, "*", new_boxes[[2]])) - new_diffs <- paste(new_diffs, reversible_binding_terms) - new_parms <- c(new_parms, k_free_bound, k_bound_free) - } # Add observed variable to model parms <- c(parms, new_parms) names(new_diffs) <- new_boxes diffs <- c(diffs, new_diffs) } + # Transfer between compartments for (varname in obs_vars) { to <- spec[[varname]]$to + if(!is.null(to)) { - origin_box <- switch(spec[[varname]]$type, - SFO = varname, - FOMC = varname, - DFOP = varname, - HS = varname, - SFORB = paste(varname, "free", sep="_")) fraction_left <- NULL + for (target in to) { - target_box <- switch(spec[[target]]$type, + target_boxes <- switch(spec[[target]]$type, SFO = target, - SFORB = paste(target, "free", sep="_")) - # SFO is no longer special - #if(spec[[varname]]$type %in% c("SFO", "SFORB")) { - if(spec[[varname]]$type == "SFORB") { - k_from_to <- paste("k", origin_box, target_box, sep="_") - diffs[[origin_box]] <- paste(diffs[[origin_box]], "-", - k_from_to, "*", origin_box) - diffs[[target_box]] <- paste(diffs[[target_box]], "+", - k_from_to, "*", origin_box) - parms <- c(parms, k_from_to) - } - # Handle SFO like the others -# if(spec[[varname]]$type %in% c("FOMC", "DFOP", "HS")) { - if(spec[[varname]]$type %in% c("FOMC", "DFOP", "HS", "SFO")) { + DFOP = paste(target, c("1", "2"), sep="_")) + + if(spec[[varname]]$type %in% c("FOMC", "DFOP", "HS", "SFO", "IORE")) { if ( length(to)==1 && !spec[[varname]]$sink ) { # There is only one output, so no need for any flow fractions. Just add the whole flow from the parent - diffs[[target_box]] <- paste(diffs[[target_box]], "+", spec[[varname]]$nlt) + formation_term <- spec[[varname]]$nlt } else { fraction_to_target = paste("f", varname, "to", target, sep="_") fraction_not_to_target = paste("(1 - ", fraction_to_target, ")", sep="") + if(is.null(fraction_left)) { fraction_really_to_target = fraction_to_target fraction_left = fraction_not_to_target @@ -225,86 +188,38 @@ CakeModel <- function(..., use_of_ff = "max") if ( target==tail(to,1) && !spec[[varname]]$sink ) { fraction_really_to_target = fraction_left } else { -# (1-fa)fb version -# fraction_really_to_target = paste(fraction_left, " * ", fraction_to_target, sep="") -# fraction_left = paste(fraction_left, " * ", fraction_not_to_target, sep="") -# fb version fraction_really_to_target = fraction_to_target fraction_left = paste(fraction_left, " - ", fraction_to_target, sep="") } } - ff[target_box] = fraction_really_to_target - diffs[[target_box]] <- paste(diffs[[target_box]], "+", ff[target_box], "*", spec[[varname]]$nlt) + + ff[target] = fraction_really_to_target + formation_term <- paste(ff[target], "*", spec[[varname]]$nlt) + # Add the flow fraction parameter (if it exists) if ( target!=tail(to,1) || spec[[varname]]$sink ) { parms <- c(parms, fraction_to_target) } } + + if (spec[[target]]$type == "DFOP") { + g_term <- paste("g", target, sep="_") + + first_formation_term <- paste(g_term, "*", formation_term) + second_formation_term <- paste("(1 -", g_term, ") *", formation_term) + + diffs[[target_boxes[[1]]]] <- paste(diffs[[target_boxes[[1]]]], "+", first_formation_term) + diffs[[target_boxes[[2]]]] <- paste(diffs[[target_boxes[[2]]]], "+", second_formation_term) + } else{ + diffs[[target_boxes[[1]]]] <- paste(diffs[[target_boxes[[1]]]], "+", formation_term) + } } } } } + model <- list(diffs = diffs, parms = parms, map = map, use_of_ff = use_of_ff) - # Create coefficient matrix if appropriate#{{{ - if (mat) { - boxes <- names(diffs) - n <- length(boxes) - m <- matrix(nrow=n, ncol=n, dimnames=list(boxes, boxes)) - - if (use_of_ff == "min") { # Minimum use of formation fractions - for (from in boxes) { - for (to in boxes) { - if (from == to) { # diagonal elements - k.candidate = paste("k", from, c(boxes, "sink"), sep="_") - k.candidate = sub("free.*bound", "free_bound", k.candidate) - k.candidate = sub("bound.*free", "bound_free", k.candidate) - k.effective = intersect(model$parms, k.candidate) - m[from,to] = ifelse(length(k.effective) > 0, - paste("-", k.effective, collapse = " "), "0") - - } else { # off-diagonal elements - k.candidate = paste("k", from, to, sep="_") - if (sub("_free$", "", from) == sub("_bound$", "", to)) { - k.candidate = paste("k", sub("_free$", "_free_bound", from), sep="_") - } - if (sub("_bound$", "", from) == sub("_free$", "", to)) { - k.candidate = paste("k", sub("_bound$", "_bound_free", from), sep="_") - } - k.effective = intersect(model$parms, k.candidate) - m[to, from] = ifelse(length(k.effective) > 0, - k.effective, "0") - } - } - } - } else { # Use formation fractions where possible - for (from in boxes) { - for (to in boxes) { - if (from == to) { # diagonal elements - k.candidate = paste("k", from, sep="_") - m[from,to] = ifelse(k.candidate %in% model$parms, - paste("-", k.candidate), "0") - if(grepl("_free", from)) { # add transfer to bound compartment for SFORB - m[from,to] = paste(m[from,to], "-", paste("k", from, "bound", sep="_")) - } - if(grepl("_bound", from)) { # add backtransfer to free compartment for SFORB - m[from,to] = paste("- k", from, "free", sep="_") - } - m[from,to] = m[from,to] - } else { # off-diagonal elements - f.candidate = paste("f", from, "to", to, sep="_") - k.candidate = paste("k", from, to, sep="_") - # SFORB with maximum use of formation fractions not implemented, see above - m[to, from] = ifelse(f.candidate %in% model$parms, - paste(f.candidate, " * k_", from, sep=""), - ifelse(k.candidate %in% model$parms, k.candidate, "0")) - } - } - } - } - model$coefmat <- m - } - if (exists("ff")) model$ff = ff class(model) <- "mkinmod" invisible(model) diff --git a/CakeNullPlot.R b/CakeNullPlot.R index f5484b4..e0823f2 100644 --- a/CakeNullPlot.R +++ b/CakeNullPlot.R @@ -1,12 +1,11 @@ #$Id$ # Generates model curves so the GUI can plot them. Used when all params are fixed so there was no fit -# The CAKE R modules are based on mkin +# Some of the CAKE R modules are based on mkin # Based on code in IRLSkinfit -# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta -# Author: Rob Nelson (Tessella) -# Tessella Project Reference: 6245 +# Modifications developed by Tessella for Syngenta: Copyright (C) 2011-2016 Syngenta +# Tessella Project Reference: 6245, 7247, 8361, 7414 -# This program is free software: you can redistribute it and/or modify +# The CAKE R modules are free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. @@ -17,38 +16,57 @@ # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program. If not, see .” +# along with this program. If not, see . -CakeNullPlot <- function(model, parms.ini, state.ini, observed, xlim = range(observed$time), digits = max(3, getOption("digits") - 3), sannMaxIter = 10000, ...) +CakeNullPlot <- function(model, parms.ini, state.ini, observed, xlim = range(observed$time), + digits = max(3, getOption("digits") - 3), sannMaxIter = 10000, atol = 1e-6, ...) { # Print the fixed values fixed <- data.frame(value = c(state.ini, parms.ini)) fixed$type = c(rep("state", length(state.ini)), rep("deparm", length(parms.ini))) cat("\nFixed parameter values:\n") print(fixed) + parms.all = c(state.ini, parms.ini) - # Print disappearence times + # Print disappearance times obs_vars = unique(as.character(observed$name)) distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) - extraDT50<- data.frame(DT50 = rep(NA, 2), row.names = c("k1", "k2")) + extraDT50<- data.frame(k1 = rep(NA, length(obs_vars)), k2 = rep(NA, length(obs_vars)), row.names = obs_vars) for (obs_var in obs_vars) { - type = names(model$map[[obs_var]])[1] - if(exists("DT50")) distimes[obs_var, ] = c(DT50, DT90) - distimes[obs_var, ] = CakeDT(type,obs_var,parms.ini,sannMaxIter) - } - - cat("\nEstimated disappearance times:\n") - print(distimes, digits=digits,...) + type = names(model$map[[obs_var]])[1] + if(exists("DT50")) distimes[obs_var, ] = c(DT50, DT90) + distimes[obs_var, ] = CakeDT(type,obs_var,parms.all,sannMaxIter) + extraDT50[obs_var, ] = CakeExtraDT(type, obs_var, parms.all) + } + + cat("\nEstimated disappearance times:\n") + print(distimes, digits=digits,...) - extraDT50[ ,c("DT50")] = CakeExtraDT(type,parms.ini) - cat("\nHalf Lives for Extra k values:\n") - print(extraDT50, digits=digits,...) - - # Plot the model + cat("\nHalf Lives for Extra k values:\n") + print(extraDT50, digits=digits,...) + + ioreRepDT = CakeIORERepresentativeDT("Parent", parms.all) + printIoreRepDT <- !is.null(ioreRepDT) + if (printIoreRepDT){ + cat("\nRepresentative Half Life for IORE:", format(signif(ioreRepDT, digits))) + } + + cat("\n") + + fomcRepDT = CakeFOMCBackCalculatedDT(parms.all) + printFomcRepDT <- !is.null(fomcRepDT) + + if (printIoreRepDT){ + cat("\nBack-calculated Half Life for FOMC:", format(signif(fomcRepDT, digits))) + } + + cat("\n") + + # Plot the model - outtimes <- seq(0, xlim[2], length.out=101) + outtimes <- seq(0, xlim[2], length.out=CAKE.plots.resolution) odeini <- state.ini odenames <- model$parms @@ -60,75 +78,58 @@ CakeNullPlot <- function(model, parms.ini, state.ini, observed, xlim = range(obs eval(parse(text=string), as.list(c(odeparms, odeini))) } - mod_vars <- names(model$diffs) - mkindiff <- function(t, state, parms) { - time <- t - diffs <- vector() - for (box in mod_vars) { - diffname <- paste("d", box, sep = "_") - diffs[diffname] <- with(as.list(c(time, state, parms)), - eval(parse(text = model$diffs[[box]]))) - } - return(list(c(diffs))) + mod_vars <- names(model$diffs) + mkindiff <- function(t, state, parms) { + time <- t + diffs <- vector() + for (box in mod_vars) { + diffname <- paste("d", box, sep = "_") + diffs[diffname] <- with(as.list(c(time, state, parms)), + eval(parse(text = model$diffs[[box]]))) } + + return(list(c(diffs))) + } + + # Ensure initial state is at time 0 + obstimes <- unique(c(0,observed$time)) + + odeini <- AdjustOdeInitialValues(odeini, model, odeparms) - # Ensure initial state is at time 0 - obstimes <- unique(c(0,observed$time)) - out <- ode( - y = odeini, - times = obstimes, - func = mkindiff, - parms = odeparms, - ) + # Solve at observation times + out <- ode(y = odeini, times = obstimes, func = mkindiff, parms = odeparms, atol = atol) - # Output transformation for models with unobserved compartments like SFORB - out_transformed <- data.frame(time = out[,"time"]) - for (var in names(model$map)) { - if(length(model$map[[var]]) == 1) { - out_transformed[var] <- out[, var] - } else { - out_transformed[var] <- rowSums(out[, model$map[[var]]]) - } - } + out_transformed <- PostProcessOdeOutput(out, model, atol) + + predicted_long <- wide_to_long(out_transformed,time="time") - predicted_long <- mkin_wide_to_long(out_transformed,time="time") # Calculate chi2 error levels according to FOCUS (2006) - errmin<-CakeChi2(model, observed, predicted_long, obs_vars, c("auto"), c(), state.ini, c("auto")) - cat("\nChi2 error levels in percent:\n") - errmin$err.min <- 100 * errmin$err.min - print(errmin, digits=digits,...) + errmin<-CakeChi2(model, observed, predicted_long, obs_vars, c(), c(), state.ini, parms.ini, fixed) + cat("\nChi2 error levels in percent:\n") + errmin$err.min <- 100 * errmin$err.min + print(errmin, digits=digits,...) + # Fit to data + data<-observed + data$err<-rep(NA,length(data$time)) + data<-merge(data, predicted_long, by=c("time","name")) + names(data)<-c("time", "variable", "observed","err-var", "predicted") + data$residual<-data$observed-data$predicted + data<-data[order(data$variable,data$time),] + cat("\nData:\n") + print(format(data, digits = digits, scientific = FALSE,...), row.names = FALSE) - # Fit to data - data<-observed - data$err<-rep(NA,length(data$time)) - data<-merge(data, predicted_long, by=c("time","name")) - names(data)<-c("time", "variable", "observed","err-var", "predicted") - data$residual<-data$observed-data$predicted - data<-data[order(data$variable,data$time),] - cat("\nData:\n") - print(format(data, digits = digits, scientific = FALSE,...), row.names = FALSE) - - - out <- ode( - y = odeini, - times = outtimes, - func = mkindiff, - parms = odeparms, - ) + # Solve at output times + out <- ode( + y = odeini, + times = outtimes, + func = mkindiff, + parms = odeparms) - # Output transformation for models with unobserved compartments like SFORB - out_transformed <- data.frame(time = out[,"time"]) - for (var in names(model$map)) { - if(length(model$map[[var]]) == 1) { - out_transformed[var] <- out[, var] - } else { - out_transformed[var] <- rowSums(out[, model$map[[var]]]) - } - } + out_transformed <- PostProcessOdeOutput(out, model, atol) - cat("\n\n") - print(out_transformed) - cat("\n") + cat("\n\n") + print(out_transformed) + cat("\n") } diff --git a/CakeOlsFit.R b/CakeOlsFit.R index 626a595..2b8e736 100644 --- a/CakeOlsFit.R +++ b/CakeOlsFit.R @@ -7,13 +7,9 @@ # from the FME package, v 1.1 by Soetart and Petzoldt, which was in turn # inspired by summary.nls.lm -#$Id$ -# This version has been modified to expect SFO parameterised as k and flow fractions -# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta -# Authors: Rob Nelson, Richard Smith, Tamar Christina -# Tessella Project Reference: 6245, 7247 +# Modifications developed by Tessella for Syngenta: Copyright (C) 2011-2016 Syngenta -# This program is free software: you can redistribute it and/or modify +# The CAKE R modules are free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. @@ -24,57 +20,69 @@ # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program. If not, see .” - -CakeOlsFit <- function(mkinmod, observed, - parms.ini = rep(0.1, length(mkinmod$parms)), - state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), - lower = 0, upper = Inf, - fixed_parms = NULL, - fixed_initials = names(mkinmod$diffs)[-1], - eigen = TRUE, - plot = FALSE, quiet = FALSE, - err = NULL, weight = "none", scaleVar = FALSE, - atol = 1e-6, - sannMaxIter = 10000, - useSolnp = FALSE, method='L-BFGS-B', - ...) +# along with this program. If not, see . + + +# Performs an ordinary least squares fit on a given CAKE model. +# +# cake.model: The model to perform the fit on (as generated by CakeModel.R). +# observed: Observation data to fit to. +# parms.ini: Initial values for the parameters being fitted. +# state.ini: Initial state (i.e. initial values for concentration, the dependent variable being modelled). +# lower: Lower bounds to apply to parameters. +# upper: Upper bound to apply to parameters. +# fixed_parms: A vector of names of parameters that are fixed to their initial values. +# fixed_initials: A vector of compartments with fixed initial concentrations. +# quiet: Whether the internal cost functions should execute more quietly than normal (less output). +# atol: The tolerance to apply to the ODE solver. +# sannMaxIter: The maximum number of iterations to apply to SANN processes. +CakeOlsFit <- function(cake.model, + observed, + parms.ini = rep(0.1, length(cake.model$parms)), + state.ini = c(100, rep(0, length(cake.model$diffs) - 1)), + lower = 0, + upper = Inf, + fixed_parms = NULL, + fixed_initials = names(cake.model$diffs)[-1], + quiet = FALSE, + atol = 1e-6, + sannMaxIter = 10000, + ...) { - mod_vars <- names(mkinmod$diffs) + mod_vars <- names(cake.model$diffs) # Subset dataframe with mapped (modelled) variables - observed <- subset(observed, name %in% names(mkinmod$map)) + observed <- subset(observed, name %in% names(cake.model$map)) # Get names of observed variables obs_vars = unique(as.character(observed$name)) - + # Name the parameters if they are not named yet - if(is.null(names(parms.ini))) names(parms.ini) <- mkinmod$parms - + if(is.null(names(parms.ini))) names(parms.ini) <- cake.model$parms + # Name the inital parameter values if they are not named yet if(is.null(names(state.ini))) names(state.ini) <- mod_vars - + # Parameters to be optimised parms.fixed <- parms.ini[fixed_parms] optim_parms <- setdiff(names(parms.ini), fixed_parms) parms.optim <- parms.ini[optim_parms] - + state.ini.fixed <- state.ini[fixed_initials] optim_initials <- setdiff(names(state.ini), fixed_initials) state.ini.optim <- state.ini[optim_initials] state.ini.optim.boxnames <- names(state.ini.optim) if(length(state.ini.optim) > 0) { - names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_") + names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_") } - + # Decide if the solution of the model can be based on a simple analytical # formula, the spectral decomposition of the matrix (fundamental system) # or a numeric ode solver from the deSolve package - if (length(mkinmod$map) == 1) { + if (length(cake.model$map) == 1) { solution = "analytical" } else { - if (is.matrix(mkinmod$coefmat) & eigen) solution = "eigen" - else solution = "deSolve" + solution = "deSolve" } - + # Create a function calculating the differentials specified by the model # if necessary if(solution == "deSolve") { @@ -85,234 +93,63 @@ CakeOlsFit <- function(mkinmod, observed, { diffname <- paste("d", box, sep="_") diffs[diffname] <- with(as.list(c(time,state, parms)), - eval(parse(text=mkinmod$diffs[[box]]))) + eval(parse(text=cake.model$diffs[[box]]))) } return(list(c(diffs))) } } - - cost.old <- 1e100 - calls <- 0 - out_predicted <- NA - # Define the model cost function - cost <- function(P) - { - assign("calls", calls+1, inherits=TRUE) - if(length(state.ini.optim) > 0) { - odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed) - names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed)) - } else odeini <- state.ini.fixed - - odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], parms.fixed) - - # Ensure initial state is at time 0 - outtimes = unique(c(0,observed$time)) - evalparse <- function(string) - { - eval(parse(text=string), as.list(c(odeparms, odeini))) - } - - # Solve the system - if (solution == "analytical") { - parent.type = names(mkinmod$map[[1]])[1] - parent.name = names(mkinmod$diffs)[[1]] - o <- switch(parent.type, - SFO = SFO.solution(outtimes, - evalparse(parent.name), - evalparse(paste("k", parent.name, sep="_"))), -# evalparse("k")), -# evalparse(paste("k", parent.name, "sink", sep="_"))), - FOMC = FOMC.solution(outtimes, - evalparse(parent.name), - evalparse("alpha"), evalparse("beta")), - DFOP = DFOP.solution(outtimes, - evalparse(parent.name), - evalparse("k1"), evalparse("k2"), - evalparse("g")), - HS = HS.solution(outtimes, - evalparse(parent.name), - evalparse("k1"), evalparse("k2"), - evalparse("tb")), - SFORB = SFORB.solution(outtimes, - evalparse(parent.name), - evalparse(paste("k", parent.name, "bound", sep="_")), - evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")), - evalparse(paste("k", parent.name, "sink", sep="_"))) - ) - out <- cbind(outtimes, o) - dimnames(out) <- list(outtimes, c("time", sub("_free", "", parent.name))) - } - if (solution == "eigen") { - coefmat.num <- matrix(sapply(as.vector(mkinmod$coefmat), evalparse), - nrow = length(mod_vars)) - e <- eigen(coefmat.num) - c <- solve(e$vectors, odeini) - f.out <- function(t) { - e$vectors %*% diag(exp(e$values * t), nrow=length(mod_vars)) %*% c - } - o <- matrix(mapply(f.out, outtimes), - nrow = length(mod_vars), ncol = length(outtimes)) - dimnames(o) <- list(mod_vars, outtimes) - out <- cbind(time = outtimes, t(o)) - } - if (solution == "deSolve") - { - out <- ode( - y = odeini, - times = outtimes, - func = mkindiff, - parms = odeparms, - atol = atol - ) - } - # Output transformation for models with unobserved compartments like SFORB - out_transformed <- data.frame(time = out[,"time"]) - for (var in names(mkinmod$map)) { - if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") { - out_transformed[var] <- out[, var] - } else { - out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) - } - } - assign("out_predicted", out_transformed, inherits=TRUE) - - mC <- CakeCost(out_transformed, observed, y = "value", - err = err, weight = weight, scaleVar = scaleVar) - mC$penalties <- CakePenalties(odeparms, out_transformed, observed) - mC$model <- mC$cost + mC$penalties; - if (mC$model < cost.old) { - if (!quiet) - cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model, - "\n") - - # Plot the data and current model output if requested - if(plot) { - outtimes_plot = seq(min(observed$time), max(observed$time), length.out=100) - if (solution == "analytical") { - o_plot <- switch(parent.type, - SFO = SFO.solution(outtimes_plot, - evalparse(parent.name), - evalparse(paste("k", parent.name, sep="_"))), -# evalparse(paste("k", parent.name, "sink", sep="_"))), - FOMC = FOMC.solution(outtimes_plot, - evalparse(parent.name), - evalparse("alpha"), evalparse("beta")), - DFOP = DFOP.solution(outtimes_plot, - evalparse(parent.name), - evalparse("k1"), evalparse("k2"), - evalparse("g")), - HS = HS.solution(outtimes_plot, - evalparse(parent.name), - evalparse("k1"), evalparse("k2"), - evalparse("tb")), - SFORB = SFORB.solution(outtimes_plot, - evalparse(parent.name), - evalparse(paste("k", parent.name, "bound", sep="_")), - evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")), - evalparse(paste("k", parent.name, "sink", sep="_"))) - ) - out_plot <- cbind(outtimes_plot, o_plot) - dimnames(out_plot) <- list(outtimes_plot, c("time", sub("_free", "", parent.name))) - } - if(solution == "eigen") { - o_plot <- matrix(mapply(f.out, outtimes_plot), - nrow = length(mod_vars), ncol = length(outtimes_plot)) - dimnames(o_plot) <- list(mod_vars, outtimes_plot) - out_plot <- cbind(time = outtimes_plot, t(o_plot)) - } - if (solution == "deSolve") { - out_plot <- ode( - y = odeini, - times = outtimes_plot, - func = mkindiff, - parms = odeparms) - } - out_transformed_plot <- data.frame(time = out_plot[,"time"]) - for (var in names(mkinmod$map)) { - if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") { - out_transformed_plot[var] <- out_plot[, var] - } else { - out_transformed_plot[var] <- rowSums(out_plot[, mkinmod$map[[var]]]) - } - } - out_transformed_plot <<- out_transformed_plot - - plot(0, type="n", - xlim = range(observed$time), ylim = range(observed$value, na.rm=TRUE), - xlab = "Time", ylab = "Observed") - col_obs <- pch_obs <- 1:length(obs_vars) - names(col_obs) <- names(pch_obs) <- obs_vars - for (obs_var in obs_vars) { - points(subset(observed, name == obs_var, c(time, value)), - pch = pch_obs[obs_var], col = col_obs[obs_var]) - } - matlines(out_transformed_plot$time, out_transformed_plot[-1]) - legend("topright", inset=c(0.05, 0.05), legend=obs_vars, - col=col_obs, pch=pch_obs, lty=1:length(pch_obs)) - } - - assign("cost.old", mC$model, inherits=TRUE) - } - # HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost - mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res) - - return(mC) - } - fit <-modFit(cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, ...) - + costFunctions <- CakeInternalCostFunctions(cake.model, state.ini.optim, state.ini.optim.boxnames, state.ini.fixed, + parms.fixed, observed, mkindiff = mkindiff, quiet, atol=atol, solution=solution, err=NULL) + + # modFit parameter transformations can explode if you put in parameters that are equal to a bound, so we move them away by a tiny amount. + all.optim <- ShiftAwayFromBoundaries(c(state.ini.optim, parms.optim), lower, upper) + + fit <-modFit(costFunctions$cost, all.optim, lower = lower, upper = upper, ...) + # We need to return some more data for summary and plotting fit$solution <- solution - if (solution == "eigen") { - fit$coefmat <- mkinmod$coefmat - } + if (solution == "deSolve") { fit$mkindiff <- mkindiff } - if (plot == TRUE) { - fit$out_transformed_plot = out_transformed_plot - } - + # We also need various other information for summary and plotting - fit$map <- mkinmod$map - fit$diffs <- mkinmod$diffs + fit$map <- cake.model$map + fit$diffs <- cake.model$diffs - # mkin_long_to_wide does not handle ragged data -# fit$observed <- mkin_long_to_wide(observed) - fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") - names(fit$observed) <- c("time", as.vector(unique(observed$name))) + out_predicted <- costFunctions$get.predicted() - predicted_long <- mkin_wide_to_long(out_predicted, time = "time") + predicted_long <- wide_to_long(out_predicted, time = "time") fit$predicted <- out_predicted - + # Collect initial parameter values in two dataframes fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) fit$start$type = c(rep("state", length(state.ini.optim)), rep("deparm", length(parms.optim))) fit$start$lower <- lower fit$start$upper <- upper - - fit$fixed <- data.frame( - value = c(state.ini.fixed, parms.fixed)) + + fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) fit$fixed$type = c(rep("state", length(state.ini.fixed)), rep("deparm", length(parms.fixed))) - -# # Calculate chi2 error levels according to FOCUS (2006) - fit$errmin <- CakeChi2(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini) - + + # Calculate chi2 error levels according to FOCUS (2006) + fit$errmin <- CakeChi2(cake.model, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini, fit$fixed) + # Calculate dissipation times DT50 and DT90 and formation fractions parms.all = c(fit$par, parms.fixed) fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)), - row.names = obs_vars) - fit$extraDT50<- data.frame(DT50 = rep(NA, 2), row.names = c("k1", "k2")) - - fit$ff <- vector() - fit$SFORB <- vector() - for (obs_var in obs_vars) { - type = names(mkinmod$map[[obs_var]])[1] - - fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all,sannMaxIter) + row.names = obs_vars) + fit$extraDT50<- data.frame(k1 = rep(NA, length(names(cake.model$map))), k2 = rep(NA, length(names(cake.model$map))), row.names = names(cake.model$map)) + + for (compartment.name in names(cake.model$map)) { + type = names(cake.model$map[[compartment.name]])[1] + + fit$distimes[compartment.name, ] = CakeDT(type,compartment.name,parms.all,sannMaxIter) + fit$extraDT50[compartment.name, ] = CakeExtraDT(type, compartment.name, parms.all) } - - fit$extraDT50[ ,c("DT50")] = CakeExtraDT(type,parms.all) + + fit$ioreRepDT = CakeIORERepresentativeDT("Parent", parms.all) + fit$fomcRepDT = CakeFOMCBackCalculatedDT(parms.all) fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed) # Collect observed, predicted and residuals @@ -324,8 +161,7 @@ CakeOlsFit <- function(mkinmod, observed, data$variable <- ordered(data$variable, levels = obs_vars) fit$data <- data[order(data$variable, data$time), ] fit$atol <- atol - + class(fit) <- c("CakeFit", "mkinfit", "modFit") return(fit) -} - +} \ No newline at end of file diff --git a/CakePenalties.R b/CakePenalties.R index 1feffbe..5506b7f 100644 --- a/CakePenalties.R +++ b/CakePenalties.R @@ -1,8 +1,8 @@ # $Id$ -# The CAKE R modules are based on mkin -# CAKE (6245), by Tessella, for Syngenta: Copyright (C) 2011 Syngenta +# Some of the CAKE R modules are based on mkin +# CAKE (6245, 7247, 8361, 7414), by Tessella, for Syngenta: Copyright (C) 2011-2016 Syngenta # -# This program is free software: you can redistribute it and/or modify +# The CAKE R modules are free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. @@ -13,44 +13,44 @@ # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program. If not, see .” +# along with this program. If not, see . # Penalty function for fits that should not be accepted CakePenalties<-function(params, modelled, obs, penalty.scale = 10, ...){ - # Because the model cost is roughly proportional to points, - # make the penalties scale that way too for consistent behaviour - # with many or few data points - sum(CakePenaltiesLong(params, modelled, obs, penalty.scale, ...)$value) * dim(obs)[[1]] + # Because the model cost is roughly proportional to points, + # make the penalties scale that way too for consistent behaviour + # with many or few data points + sum(CakePenaltiesLong(params, modelled, obs, penalty.scale, ...)$value) * dim(obs)[[1]] } -CakePenaltiesLong<-function(params, modelled, obs, penalty.scale = 10, ...){ - r<-data.frame(name=character(0), value=numeric(0)) - - # Flow fractions > 1 - #------------------------------- - fails<-CakePenaltiesFF(params); - if(dim(fails)[[1]] > 0){ - #print("Penalty failures:") - #print(fails) - # Add a failure record for each flow fraction that missed - names(fails)<-c('name', 'value') - fails$value = 100*(fails$value - 1) - fails$name = lapply(fails$name, function(x) paste("FF_", x, sep="")) - r <- rbind(r, fails) - #totalPenalties <- totalPenalties + 100*sum(fails$x - 1) # Penalty for failure - } +CakePenaltiesLong<-function(params, modelled, obs, penalty.scale = 10, ...){ + r<-data.frame(name=character(0), value=numeric(0)) + + # Flow fractions > 1 + #------------------------------- + fails<-CakePenaltiesFF(params); + if(dim(fails)[[1]] > 0){ + #print("Penalty failures:") + #print(fails) + # Add a failure record for each flow fraction that missed + names(fails)<-c('name', 'value') + fails$value = 100*(fails$value - 1) + fails$name = lapply(fails$name, function(x) paste("FF_", x, sep="")) + r <- rbind(r, fails) + #totalPenalties <- totalPenalties + 100*sum(fails$x - 1) # Penalty for failure + } - r$value <- r$value * penalty.scale - return(r) + r$value <- r$value * penalty.scale + return(r) } CakePenaltiesFF<-function(params){ - ff.values<-params["f_"==substr(names(params), 0, 2)] # Flow fractions - if(length(ff.values) > 0){ - ffs<-data.frame(t(sapply(strsplit(names(ff.values), '_'), FUN=function(x){ x[c(2,4)] }))) # Split into source/dest - names(ffs)<-c('source', 'dest') - ffs['value'] <- ff.values # Add values - sums<-aggregate(ffs$value, list(s=ffs$source), sum) # Sum of flows from each source - fails<-sums[sums$x>1.00001,] # All compartments > 1 - } else { fails<-data.frame(s=character(0), x=numeric(0)) } - return(fails) + ff.values<-params["f_"==substr(names(params), 0, 2)] # Flow fractions + if(length(ff.values) > 0){ + ffs<-data.frame(t(sapply(strsplit(names(ff.values), '_'), FUN=function(x){ x[c(2,4)] }))) # Split into source/dest + names(ffs)<-c('source', 'dest') + ffs['value'] <- ff.values # Add values + sums<-aggregate(ffs$value, list(s=ffs$source), sum) # Sum of flows from each source + fails<-sums[sums$x>1.00001,] # All compartments > 1 + } else { fails<-data.frame(s=character(0), x=numeric(0)) } + return(fails) } \ No newline at end of file diff --git a/CakePlot.R b/CakePlot.R new file mode 100755 index 0000000..1b80a4b --- /dev/null +++ b/CakePlot.R @@ -0,0 +1,222 @@ +#$Id$ +# Generates fitted curves so the GUI can plot them +# Based on code in IRLSkinfit +# Modifications developed by Tessella for Syngenta: Copyright (C) 2011-2016 Syngenta +# Tessella Project Reference: 6245, 7247, 8361, 7414 +# +# The CAKE R modules are free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +CakeDoPlot <- function(fit, xlim = range(fit$data$time), ...) +{ + t.map.names <- names(fit$map) + metabolites <- grep("[A-Z]\\d", t.map.names, value = TRUE) + t.map.rest <- setdiff(t.map.names, metabolites) + + # Generate the normal graphs. + final <- CakeSinglePlot(fit, xlim) + final_scaled <- final + + if(length(metabolites) > 0){ + for(i in 1:length(metabolites)) + { + metabolite <- metabolites[i] + + if (names(fit$map[[metabolite]])[1] != "SFO"){ + # We do not support these curves for models other than SFO (for now; see CU-79). + next + } + + fixed <- fit$fixed$value + names(fixed) <- rownames(fit$fixed) + parms.all <- c(fit$par, fixed) + ininames <- c( + rownames(subset(fit$start, type == "state")), + rownames(subset(fit$fixed, type == "state"))) + odeini <- parms.all[ininames] + + metabolite0Parameter <- metabolite + + if (!(metabolite %in% names(odeini))){ + metabolite0Parameter <- paste0(metabolite, "_0") + } + + if (odeini[[metabolite0Parameter]] != 0){ + # We do not support these curves where the initial concentration is non-zero (for now; see CU-79). + next + } + + decay_var <- paste("k", metabolite, sep="_") + + # calculate the new ffm (formation factor) and generate the two ffm scale charts + regex <- paste("f_(.+)_to", metabolite, sep="_") + decays = grep(regex, names(fit$par), value = TRUE) + + if (length(decays) != 1){ + # We do not support these curves where there is formation from more than 1 compartment (for now; see CU-79). + next + } + + ffm_fitted <- sum(fit$par[decays]) + normal <- final + ffm_scale <- NULL + + numeric_DT50 <- as.numeric(fit$distimes[metabolite,][["DT50"]]) + + if (is.na(numeric_DT50)){ + # We can't get anywhere without a numeric DT50. + next + } + + # Generate the curve for DT50=1000d and ffm as fitted. + if (decay_var %in% names(fit$par)){ + k_new <- fit$par[decay_var]*numeric_DT50/1000; + fit$par[decay_var]<- k_new + } + else{ + # If decay_var was fixed, need to modify it there. + k_new <- fit$fixed[decay_var,]["value"]*numeric_DT50/1000; + fit$fixed[decay_var,]["value"] <- k_new[decay_var,] + } + + dt50_1000_ffm_fitted <- CakeSinglePlot(fit, xlim)[metabolite] + + naming <- c(names(final), paste(metabolite, "DT50_1000_FFM_FITTED", sep="_")) + normal <- c(final, dt50_1000_ffm_fitted) + names(normal) <- naming + final <- normal + + # Generate the scaled FFM + if(ffm_fitted != 0) + { + ffm_scale <- 1 / ffm_fitted + final_scaled <- final[c("time", metabolite, paste(metabolite, "DT50_1000_FFM_FITTED", sep="_"))] + final_scaled[t.map.rest] <- NULL; + final_frame <- as.data.frame(final_scaled) + new_names <- c(paste(metabolite, "DT50_FITTED_FFM_1", sep="_"), paste(metabolite, "DT50_1000_FFM_1", sep="_")) + names(final_frame) <- c("time", new_names) + final_frame[new_names]<-final_frame[new_names]*ffm_scale; + + cat("\n") + + write.table(final_frame, quote=FALSE) + + cat("\n") + } + } + } + + cat("\n") + + write.table(final, quote=FALSE) + + cat("\n") + + # View(final) +} + +CakeSinglePlot <- function(fit, xlim = range(fit$data$time), scale_x = 1.0, ...) +{ + solution = fit$solution + if ( is.null(solution) ) { + solution <- "deSolve" + } + atol = fit$atol + if ( is.null(atol) ) { + atol = 1.0e-6 + } + + fixed <- fit$fixed$value + names(fixed) <- rownames(fit$fixed) + parms.all <- c(fit$par, fixed) + ininames <- c( + rownames(subset(fit$start, type == "state")), + rownames(subset(fit$fixed, type == "state"))) + odeini <- parms.all[ininames] + names(odeini) <- gsub("_0$", "", names(odeini)) + odenames <- c( + rownames(subset(fit$start, type == "deparm")), + rownames(subset(fit$fixed, type == "deparm"))) + odeparms <- parms.all[odenames] + odeini <- AdjustOdeInitialValues(odeini, fit, odeparms) + + outtimes <- seq(0, xlim[2], length.out=CAKE.plots.resolution) * scale_x + + # Solve the system + evalparse <- function(string) + { + eval(parse(text=string), as.list(c(odeparms, odeini))) + } + if (solution == "analytical") { + parent.type = names(fit$map[[1]])[1] + parent.name = names(fit$diffs)[[1]] + o <- switch(parent.type, + SFO = SFO.solution(outtimes, + evalparse(parent.name), + evalparse(paste("k", parent.name, sep="_"))), + FOMC = FOMC.solution(outtimes, + evalparse(parent.name), + evalparse("alpha"), evalparse("beta")), + DFOP = DFOP.solution(outtimes, + evalparse(parent.name), + evalparse(paste("k1", parent.name, sep="_")), + evalparse(paste("k2", parent.name, sep="_")), + evalparse(paste("g", parent.name, sep="_"))), + HS = HS.solution(outtimes, + evalparse(parent.name), + evalparse("k1"), evalparse("k2"), + evalparse("tb")), + IORE = IORE.solution(outtimes, + evalparse(parent.name), + evalparse(paste("k", parent.name, sep="_")), + evalparse("N")) + ) + out <- cbind(outtimes, o) + dimnames(out) <- list(outtimes, c("time", parent.name)) + } + if (solution == "eigen") { + coefmat.num <- matrix(sapply(as.vector(fit$coefmat), evalparse), + nrow = length(odeini)) + e <- eigen(coefmat.num) + c <- solve(e$vectors, odeini) + f.out <- function(t) { + e$vectors %*% diag(exp(e$values * t), nrow=length(odeini)) %*% c + } + o <- matrix(mapply(f.out, outtimes), + nrow = length(odeini), ncol = length(outtimes)) + dimnames(o) <- list(names(odeini), NULL) + out <- cbind(time = outtimes, t(o)) + } + if (solution == "deSolve") { + out <- ode( + y = odeini, + times = outtimes, + func = fit$mkindiff, + parms = odeparms, + atol = atol + ) + } + + out_transformed <- PostProcessOdeOutput(out, fit, atol) + + # Replace values that are incalculably small with 0. + for (compartment.name in names(fit$map)) { + if (length(out_transformed[, compartment.name][!is.nan(out_transformed[, compartment.name])]) > 0) { + out_transformed[, compartment.name][is.nan(out_transformed[, compartment.name])] <- 0 + } + + out_transformed[, compartment.name][out_transformed[, compartment.name] < 0] <- 0 + } + + return(out_transformed) +} diff --git a/CakeSolutions.R b/CakeSolutions.R new file mode 100755 index 0000000..b78ab84 --- /dev/null +++ b/CakeSolutions.R @@ -0,0 +1,42 @@ +# $Id$ +# Some of the CAKE R modules are based on mkin, +# Developed by Tessella Ltd for Syngenta: Copyright (C) 2011-2016 Syngenta +# Tessella Project Reference: 6245, 7247, 8361, 7414 + +# The CAKE R modules are free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# Produces solutions to the SFO equation given times t and parameters M_0 and k. +SFO.solution <- function(t, parent.0, k) { + parent = parent.0 * exp(-k * t) +} + +# Produces solutions to the DFOP equation given times t and parameters M_0, k1, k2 and g. +DFOP.solution <- function(t, parent.0, k1, k2, g) { + parent = g * parent.0 * exp(-k1 * t) + (1 - g) * parent.0 * exp(-k2 * t) +} + +# Produces solutions to the FOMC equation given times t and parameters M_0, alpha and beta. +FOMC.solution <- function(t, parent.0, alpha, beta) { + parent = parent.0/(t/beta + 1)^alpha +} + +# Produces solutions to the HS equation given times t and parameters M_0, k1, k2 and tb. +HS.solution <- function (t, parent.0, k1, k2, tb) { + parent = ifelse(t <= tb, parent.0 * exp(-k1 * t), parent.0 * exp(-k1 * tb) * exp(-k2 * (t - tb))) +} + +# Produces solutions to the IORE equation given times t and parameters M_0, k and N. +IORE.solution <- function(t, parent.0, k, N) { + parent = (parent.0^(1 - N) - (1 - N) * k * t)^(1/(1-N)) +} \ No newline at end of file diff --git a/CakeSummary.R b/CakeSummary.R index 6c7c661..70f01e6 100644 --- a/CakeSummary.R +++ b/CakeSummary.R @@ -3,10 +3,10 @@ # and display the summary itself. # Parts modified from package mkin -# Parts Tessella (Rob Nelson/Richard Smith/Tamar Christina) for Syngenta: Copyright (C) 2011 Syngenta -# Tessella Project Reference: 6245, 7247 +# Parts Tessella for Syngenta: Copyright (C) 2011-2016 Syngenta +# Tessella Project Reference: 6245, 7247, 8361, 7414 -# This program is free software: you can redistribute it and/or modify +# The CAKE R modules are free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. @@ -17,27 +17,84 @@ # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program. If not, see .” +# along with this program. If not, see . # Calculate the extra decay times for the other k values in DFOP # and HS # Arguments # type - type of compartment # parms.all - list of parameters -CakeExtraDT<-function(type, parms.all) { +CakeExtraDT<-function(type, obs_var, parms.all) { DT50k1 = NA - DT50k2 = NA - if (!is.na(parms.all["k1"]) && !is.na(parms.all["k2"])) { - k1 = parms.all["k1"] - k2 = parms.all["k2"] - DT50k1 = log(2)/k1 - DT50k2 = log(2)/k2 + DT50k2 = NA + if (type == "DFOP"){ + k1_name = paste("k1", obs_var, sep="_") + k2_name = paste("k2", obs_var, sep="_") + + if (!is.na(parms.all[k1_name]) && !is.na(parms.all[k2_name])) { + k1 = parms.all[k1_name] + k2 = parms.all[k2_name] + DT50k1 = log(2)/k1 + DT50k2 = log(2)/k2 + } } + + if (type == "HS"){ + if (!is.na(parms.all["k1"]) && !is.na(parms.all["k2"])) { + k1 = parms.all["k1"] + k2 = parms.all["k2"] + DT50k1 = log(2)/k1 + DT50k2 = log(2)/k2 + } + } + ret<-c(DT50k1, DT50k2) names(ret)<-c("k1", "k2") ret } +# Calculate the representative decay time for an IORE model. +# Arguments +# obs_var - compartment name +# parms.all - list of parameters +CakeIORERepresentativeDT<-function(obs_var, parms.all) { + repDT = NA + + if ("N" %in% names(parms.all)){ + k_name = paste("k", obs_var, sep="_") + k_tot = parms.all[[k_name]] + Nval = parms.all[["N"]] + M0_name = paste(obs_var, "0", sep="_") + + if (!(M0_name %in% names(parms.all))){ + M0_name = obs_var + } + + M0val = parms.all[[M0_name]] + if (Nval == 1){ + repDT = log(2)/k_tot + } + else{ + repDT = (log(2)/log(10)) * ((10^(Nval - 1) - 1) * M0val^(1 - Nval))/((Nval - 1) * k_tot) + } + } + + repDT +} + +# Calculate the "back-calculated" DT50 for an FOMC model. +# Arguments +# parms.all - list of parameters +CakeFOMCBackCalculatedDT<-function(parms.all) { + repDT = NA + + if ("alpha" %in% names(parms.all) && "beta" %in% names(parms.all)){ + repDT = parms.all[["beta"]] * (10^(1/parms.all[["alpha"]]) - 1) * (log(2)/log(10)) + } + + repDT +} + # Calculate the decay times for a compartment # Arguments # type - type of compartment @@ -46,8 +103,6 @@ CakeExtraDT<-function(type, parms.all) { # sannMaxIter - the maximum amount of iterations to do for SANN CakeDT<-function(type, obs_var, parms.all, sannMaxIter) { if (type == "SFO") { - # Changed to new parameterisation - #k_names = grep(paste("k", obs_var, sep="_"), names(parms.all), value=TRUE) k_name = paste("k", obs_var, sep="_") k_tot = parms.all[k_name] DT50 = log(2)/k_tot @@ -60,24 +115,56 @@ CakeDT<-function(type, obs_var, parms.all, sannMaxIter) { DT90 = beta * (10^(1/alpha) - 1) } if (type == "DFOP") { - k1 = parms.all["k1"] - k2 = parms.all["k2"] - g = parms.all["g"] + k1 = parms.all[paste("k1", obs_var, sep="_")] + k2 = parms.all[paste("k2", obs_var, sep="_")] + g = parms.all[paste("g", obs_var, sep="_")] f <- function(t, x) { fraction <- g * exp( - k1 * t) + (1 - g) * exp( - k2 * t) (fraction - (1 - x/100))^2 } DTminBounds <- 0.001 - DTminInitial <- 1 - DT50.temp <- optim(DTminInitial, f, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 50, lower = DTminBounds) - DT50.o = DT50.temp$par - DT50.converged = DT50.temp$convergence == 0 - DT50 = ifelse(!DT50.converged, NA, DT50.o) - DT90.temp <- optim(DTminInitial, f, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 90, lower = DTminBounds) - DT90.o = DT90.temp$par - DT90.converged = DT90.temp$convergence == 0 - DT90 = ifelse(!DT90.converged, NA, DT90.o) + + # Determine a decent starting point for numerical iteration. The latter two terms are also lower bounds for DT50. + DT50min <- max(0.001, (1 / k1) * log(g * 2), (1 / k2) * log((1 - g) * 2)) + + # Set the starting point for the numerical solver to a be a bit smaller than the computed minimum as the R optim function seems to get confused if the min is + # greater than the answer it's trying to converge to (up to its level of accuracy). + DT50Initial <- DT50min * 0.9 + + # Results greater than 10,000 are not interesting. The R optim routine also handles very large values unreliably and can claim to converge when it is nowhere near. + if (DT50min > 10000){ + DT50 = ">10,000" + }else{ + DT50.temp <- optim(DT50Initial, f, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 50, lower = DTminBounds) + DT50.o = DT50.temp$par + DT50.converged = DT50.temp$convergence == 0 + DT50 = ifelse(!DT50.converged || DT50.o <= 0, NA, DT50.o) + + if (DT50.converged && DT50 > 10000){ + DT50 = ">10,000" + } + } + + # Determine a decent starting point for numerical iteration. The latter two terms are also lower bounds for DT90. + DT90min <- max(0.001, (1 / k1) * log(g * 10), (1 / k2) * log((1 - g) * 10)) + + # Set the starting point for the numerical solver to a be a bit smaller than the computed minimum as the R optim function seems to get confused if the min is + # greater than the answer it's trying to converge to (up to its level of accuracy). + DT90Initial <- DT90min * 0.9 + + if (DT90min > 10000){ + DT90 = ">10,000" + }else{ + DT90.temp <- optim(DT90Initial, f, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 90, lower = DTminBounds) + DT90.o = DT90.temp$par + DT90.converged = DT90.temp$convergence == 0 + DT90 = ifelse(!DT90.converged || DT90.o <= 0, NA, DT90.o) + + if (DT90.converged && DT90 > 10000){ + DT90 = ">10,000" + } + } } if (type == "HS") { k1 = parms.all["k1"] @@ -93,34 +180,28 @@ CakeDT<-function(type, obs_var, parms.all, sannMaxIter) { DT50 <- DTx(50) DT90 <- DTx(90) } - if (type == "SFORB") { - # FOCUS kinetics (2006), p. 60 f - k_out_names = grep(paste("k", obs_var, "free", sep="_"), names(parms.all), value=TRUE) - k_out_names = setdiff(k_out_names, paste("k", obs_var, "free", "bound", sep="_")) - k_1output = sum(parms.all[k_out_names]) - k_12 = parms.all[paste("k", obs_var, "free", "bound", sep="_")] - k_21 = parms.all[paste("k", obs_var, "bound", "free", sep="_")] - - sqrt_exp = sqrt(1/4 * (k_12 + k_21 + k_1output)^2 + k_12 * k_21 - (k_12 + k_1output) * k_21) - b1 = 0.5 * (k_12 + k_21 + k_1output) + sqrt_exp - b2 = 0.5 * (k_12 + k_21 + k_1output) - sqrt_exp - - SFORB_fraction = function(t) { - ((k_12 + k_21 - b1)/(b2 - b1)) * exp(-b1 * t) + - ((k_12 + k_21 - b2)/(b1 - b2)) * exp(-b2 * t) - } - f_50 <- function(t) (SFORB_fraction(t) - 0.5)^2 - DTmin <- 0.001 - DT50.temp <- optim(DTmin, f_50, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 50, lower = DTmin) - DT50.o = DT50.temp$par - DT50.converged = DT50.temp$convergence == 0 - if (!DT50.converged) DT50 = NA else DT50 = DT50.o - f_90 <- function(t) (SFORB_fraction(t) - 0.1)^2 - DT90.temp <- optim(DTmin, f_90, method="SANN", control=list(fnscale=0.05, maxit=sannMaxIter), x = 90, lower = DTmin) - DT90.o = DT90.temp$par - DT90.converged = DT90.temp$convergence == 0 - if (!DT90.converged) DT90 = NA else DT90 = DT90.o + if (type == "IORE") { + k_name = paste("k", obs_var, sep="_") + k_tot = parms.all[k_name] + Nval = parms.all["N"] + M0_name = paste(obs_var, "0", sep="_") + + if (!(M0_name %in% names(parms.all))){ + M0_name = obs_var + } + + M0val = parms.all[M0_name] + + if (Nval == 1){ + DT50 = log(2)/k_tot + DT90 = log(10)/k_tot + } + else{ + DT50 = ((2^(Nval - 1) - 1) * M0val^(1 - Nval))/((Nval - 1) * k_tot) + DT90 = ((10^(Nval - 1) - 1) * M0val^(1 - Nval))/((Nval - 1) * k_tot) + } } + ret<-c(DT50, DT90) names(ret)<-c("DT50","DT90") ret @@ -133,68 +214,43 @@ CakeDT<-function(type, obs_var, parms.all, sannMaxIter) { # obs_vars - compartment names # parms.optim - list of fitted parameters # state,ini.optim - list of fitted initial values +# fixed - parameters that were fixed (taken from fit$fixed usually) # -CakeChi2<-function(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini) { +CakeChi2<-function(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini, fixed) { # Calculate chi2 error levels according to FOCUS (2006) - if(getRversion() >= '2.14.0'){ # You would usually call mkinfit to fit the data, however it seems that we've already fit the data # bundle <- mkinfit(mkinmod, observed, parms.ini = parms.optim, state.ini = state.ini) #, err='err') # Somewhere else. Calling mkinfit may result in an error during fitting. So Instead we just create - # the values that the new version of mkinerrmin expects. There is no class check so it's fine. + # the values that the new version of CakeErrMin expects. There is no class check so it's fine. bundle<-vector() bundle$predicted <- predicted_long bundle$observed <- observed bundle$obs_vars <- obs_vars bundle$par <- c(parms.optim, state.ini.optim) - errmin.overall <- mkinerrmin(bundle) + bundle$fixed <- fixed + errmin.overall <- CakeErrMin(bundle) errmin.overall - } else { - means <- aggregate(value ~ time + name, data = observed, mean, na.rm=TRUE) - errdata <- merge(means, predicted_long, by = c("time", "name"), suffixes = c("_mean", "_pred")) - errdata <- errdata[order(errdata$time, errdata$name), ] - errmin.overall <- mkinerrmin(errdata, length(parms.optim) + length(state.ini.optim)) - - errmin <- data.frame(err.min = errmin.overall$err.min, - n.optim = errmin.overall$n.optim, df = errmin.overall$df) - rownames(errmin) <- "All data" - for (obs_var in obs_vars) - { - errdata.var <- subset(errdata, name == obs_var) - n.k.optim <- length(grep(paste("k", obs_var, sep="_"), names(parms.optim))) - n.initials.optim <- length(grep(paste(obs_var, ".*", "_0", sep=""), names(state.ini.optim))) - n.optim <- n.k.optim + n.initials.optim - if ("alpha" %in% names(parms.optim)) n.optim <- n.optim + 1 - if ("beta" %in% names(parms.optim)) n.optim <- n.optim + 1 - if ("k1" %in% names(parms.optim)) n.optim <- n.optim + 1 - if ("k2" %in% names(parms.optim)) n.optim <- n.optim + 1 - if ("g" %in% names(parms.optim)) n.optim <- n.optim + 1 - if ("tb" %in% names(parms.optim)) n.optim <- n.optim + 1 - errmin.tmp <- mkinerrmin(errdata.var, n.optim) - errmin[obs_var, c("err.min", "n.optim", "df")] <- errmin.tmp - } - errmin - } } -# Compute additional statistics (rē/efficiency) of obs v pred for each compartment and the whole problem +# Compute additional statistics (r?/efficiency) of obs v pred for each compartment and the whole problem CakeAdditionalStats<-function(obsvspred){ - agg<-aggregate(obsvspred[c('observed', 'err-var', 'predicted', 'residual')], list(name=obsvspred$variable), function(x){x}, simplify=FALSE) - frames<-apply(agg, 1, as.data.frame); - names(frames)<-agg$name; - frames$all<-obsvspred - - t(sapply(frames, function(frame){ - # This function takes a data frame for a single variable and makes the stats for it - # rē: FOCUS eq 6.5 p. 97 - # Efficiency: eq 6.6 p. 99 - do <- frame$observed - mean(frame$observed) - dp <- frame$predicted - mean(frame$predicted) - r2 <- ( sum(do * dp) / sqrt(sum(do^2) * sum(dp^2)) ) ^ 2 - eff <- 1 - ( sum((frame$predicted - frame$observed)^2) / sum(do^2) ) - list(r2=r2, eff=eff) - })) + agg<-aggregate(obsvspred[c('observed', 'err-var', 'predicted', 'residual')], list(name=obsvspred$variable), function(x){x}, simplify=FALSE) + frames<-apply(agg, 1, as.data.frame); + names(frames)<-agg$name; + frames$all<-obsvspred + + t(sapply(frames, function(frame){ + # This function takes a data frame for a single variable and makes the stats for it + # r?: FOCUS eq 6.5 p. 97 + # Efficiency: eq 6.6 p. 99 + do <- frame$observed - mean(frame$observed) + dp <- frame$predicted - mean(frame$predicted) + r2 <- ( sum(do * dp) / sqrt(sum(do^2) * sum(dp^2)) ) ^ 2 + eff <- 1 - ( sum((frame$predicted - frame$observed)^2) / sum(do^2) ) + list(r2=r2, eff=eff) + })) } # Summarise a fit - used for CakeIrlsFit and CakeOlsFit @@ -202,22 +258,33 @@ summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, halflives = TR param <- object$par pnames <- names(param) p <- length(param) - covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance - # covar <- try(ginv(0.5*object$hessian)) # unscaled covariance, calculate using generic inversion + + # If the Hessian comes back as the identity matrix, there are no residuals and so we can't provide statistics + if(object$hessian==diag(dim(object$hessian)[1])) { + covar=NULL + } else { + covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance + } rdf <- object$df.residual + + compartmentsFixedConcentration <- gsub("_0$", "", rownames(subset(object$fixed, type=="state"))) + + # As for chi squared values, observations at time 0 should not count towards number of degrees of freedom if the corresponding + # initial concentration is 0. + for (compartment in compartmentsFixedConcentration){ + timeZeroObservations <- subset(object$data, time==0 & variable==compartment) + + if (length(row.names(timeZeroObservations)) > 0){ + rdf <- rdf - length(row.names(timeZeroObservations)) + } + } + resvar <- object$ssr / rdf modVariance <- object$ssr / length(object$residuals) - - # if(!is.numeric(covar)){ - # covar <- try(ginv(0.5*object$hessian)) - # } if (!is.numeric(covar)) { -# message <- "Cannot estimate covariance; system is singular" -# warning(message) -# covar <- matrix(data = NA, nrow = p, ncol = p) - covar=NULL + covar=NULL } else { message <- "ok" rownames(covar) <- colnames(covar) <-pnames @@ -233,7 +300,6 @@ summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, halflives = TR } else message <- "ok" if(cov && !is.null(covar)) { - # Filter the values for t-test, only apply t-test to k-values t.names <- grep("k(\\d+)|k_(.*)", names(tval), value = TRUE) t.rest <- setdiff(names(tval), t.names) @@ -241,19 +307,15 @@ summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, halflives = TR t.values[t.rest] <- NA t.result <- pt(t.values, rdf, lower.tail = FALSE) - # Now set the values we're not interested in for the lower - # and upper bound we're not interested in to NA - t.param = c(param) - t.param[t.names] <- NA # calculate the 90% confidence interval alpha <- 0.10 - lci90 <- t.param + qt(alpha/2,rdf)*se - uci90 <- t.param + qt(1-alpha/2,rdf)*se + lci90 <- c(param) + qt(alpha/2,rdf)*se + uci90 <- c(param) + qt(1-alpha/2,rdf)*se # calculate the 95% confidence interval alpha <- 0.05 - lci95 <- t.param + qt(alpha/2,rdf)*se - uci95 <- t.param + qt(1-alpha/2,rdf)*se + lci95 <- c(param) + qt(alpha/2,rdf)*se + uci95 <- c(param) + qt(1-alpha/2,rdf)*se param <- cbind(param, se, tval, t.result, lci90, uci90, lci95, uci95) dimnames(param) <- list(pnames, c("Estimate", "Std. Error", @@ -282,15 +344,19 @@ summary.CakeFit <- function(object, data = TRUE, distimes = TRUE, halflives = TR ans$diffs <- object$diffs if(data) { - ans$data <- object$data - ans$additionalstats = CakeAdditionalStats(object$data) + ans$data <- object$data + ans$additionalstats = CakeAdditionalStats(object$data) } ans$start <- object$start ans$fixed <- object$fixed ans$errmin <- object$errmin ans$penalties <- object$penalties if(distimes) ans$distimes <- object$distimes - if(halflives) ans$extraDT50 <- object$extraDT50 + if(halflives){ + ans$extraDT50 <- object$extraDT50 + ans$ioreRepDT <- object$ioreRepDT + ans$fomcRepDT <- object$fomcRepDT + } if(ff) ans$ff <- object$ff if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.cakeModFit") @@ -334,20 +400,24 @@ print.summary.CakeFit <- function(x, digits = max(3, getOption("digits") - 3), . if(printextraDT50){ cat("\nHalf Lives for Extra k values:\n") print(x$extraDT50, digits=digits,...) + } + + printIoreRepDT <- !is.null(x$ioreRepDT) + if (printIoreRepDT){ + cat("\nRepresentative Half Life for IORE:", format(signif(x$ioreRepDT, digits))) } + printFomcRepDT <- !is.null(x$fomcRepDT) + if (printFomcRepDT){ + cat("\nBack-calculated Half Life for FOMC:", format(signif(x$fomcRepDT, digits))) + } + printff <- !is.null(x$ff) if(printff){ cat("\nEstimated formation fractions:\n") print(data.frame(ff = x$ff), digits=digits,...) } - printSFORB <- !is.null(x$SFORB) - if(printSFORB){ - cat("\nEstimated Eigenvalues of SFORB model(s):\n") - print(x$SFORB, digits=digits,...) - } - printcor <- !is.null(x$cov.unscaled) if (printcor){ Corr <- cov2cor(x$cov.unscaled) @@ -363,13 +433,13 @@ print.summary.CakeFit <- function(x, digits = max(3, getOption("digits") - 3), . } if(!is.null(x$additionalstats)){ - cat("\nAdditional Stats:\n"); - print(x$additionalstats, digits=digits, ...) + cat("\nAdditional Stats:\n"); + print(x$additionalstats, digits=digits, ...) } if(!(is.null(x$penalties) || 0 == dim(x$penalties)[[1]])){ - cat("\nPenalties:\n"); - print(x$penalties, digits=digits, row.names = FALSE, ...) + cat("\nPenalties:\n"); + print(x$penalties, digits=digits, row.names = FALSE, ...) } if ( !is.null(x$seed) ) { -- cgit v1.2.1