summaryrefslogtreecommitdiff
path: root/CakeMcmcFit.R
diff options
context:
space:
mode:
Diffstat (limited to 'CakeMcmcFit.R')
-rw-r--r--CakeMcmcFit.R361
1 files changed, 165 insertions, 196 deletions
diff --git a/CakeMcmcFit.R b/CakeMcmcFit.R
index f9a340e..65c9431 100644
--- a/CakeMcmcFit.R
+++ b/CakeMcmcFit.R
@@ -1,11 +1,9 @@
-# $Id$
-# The CAKE R modules are based on mkin,
+# Some of the CAKE R modules are based on mkin,
# Based on mcmckinfit as modified by Bayer
-# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta
-# Author: Rob Nelson, Tamar Christina
-# Tessella Project Reference: 6245, 7247
+# Modifications developed by Tessella for Syngenta: Copyright (C) 2011-2016 Syngenta
+# Tessella Project Reference: 6245, 7247, 8361, 7414
-# This program is free software: you can redistribute it and/or modify
+# The CAKE R modules are free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
@@ -16,168 +14,155 @@
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.”
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
-CakeMcmcFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)),
- state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), lower = 0,
- upper = Inf, fixed_parms = NULL, fixed_initials = names(mkinmod$diffs)[-1],
- plot = FALSE, quiet = FALSE, err = NULL, weight = "none",
- scaleVar = FALSE, commonsigma=FALSE,jump = NULL,prior = NULL,
- wvar0=0.1,niter = 1000,
- outputlength = niter, burninlength = 0, updatecov = niter,
- ntrydr = 1, drscale = NULL, verbose = TRUE,fitstart=TRUE,seed=NULL,atol=1e-6,
- sannMaxIter = 10000, control=list(),
- useSolnp = FALSE, method='L-BFGS-B',...)
+# Performs an Markov chain Monte Carlo least squares fit on a given CAKE model.
+#
+# cake.model: The model to perform the fit on (as generated by CakeModel.R).
+# observed: Observation data to fit to.
+# parms.ini: Initial values for the parameters being fitted.
+# state.ini: Initial state (i.e. initial values for concentration, the dependent variable being modelled).
+# lower: Lower bounds to apply to parameters.
+# upper: Upper bound to apply to parameters.
+# fixed_parms: A vector of names of parameters that are fixed to their initial values.
+# fixed_initials: A vector of compartments with fixed initial concentrations.
+# quiet: Whether the internal cost functions should execute more quietly than normal (less output).
+# niter: The number of MCMC iterations to apply.
+# atol: The tolerance to apply to the ODE solver.
+# sannMaxIter: The maximum number of iterations to apply to SANN processes.
+# control: ...
+# useExtraSolver: Whether to use the extra solver for this fit (only used for the initial first fit).
+CakeMcmcFit <- function (cake.model,
+ observed,
+ parms.ini,
+ state.ini,
+ lower,
+ upper,
+ fixed_parms = NULL,
+ fixed_initials,
+ quiet = FALSE,
+ niter = 1000,
+ verbose = TRUE,
+ seed=NULL,
+ atol=1e-6,
+ sannMaxIter = 10000,
+ control=list(),
+ useExtraSolver = FALSE,
+ ...)
{
-
NAind <-which(is.na(observed$value))
- mod_vars <- names(mkinmod$diffs)
- observed <- subset(observed, name %in% names(mkinmod$map))
+ mod_vars <- names(cake.model$diffs)
+ observed <- subset(observed, name %in% names(cake.model$map))
ERR <- rep(1,nrow(observed))
observed <- cbind(observed,err=ERR)
obs_vars = unique(as.character(observed$name))
- if (is.null(names(parms.ini)))
- names(parms.ini) <- mkinmod$parms
+
+ if (is.null(names(parms.ini))) {
+ names(parms.ini) <- cake.model$parms
+ }
+
mkindiff <- function(t, state, parms) {
time <- t
diffs <- vector()
+
for (box in mod_vars) {
diffname <- paste("d", box, sep = "_")
diffs[diffname] <- with(as.list(c(time, state, parms)),
- eval(parse(text = mkinmod$diffs[[box]])))
+ eval(parse(text = cake.model$diffs[[box]])))
}
+
return(list(c(diffs)))
}
- if (is.null(names(state.ini)))
+
+ if (is.null(names(state.ini))) {
names(state.ini) <- mod_vars
+ }
+
parms.fixed <- parms.ini[fixed_parms]
optim_parms <- setdiff(names(parms.ini), fixed_parms)
parms.optim <- parms.ini[optim_parms]
state.ini.fixed <- state.ini[fixed_initials]
optim_initials <- setdiff(names(state.ini), fixed_initials)
state.ini.optim <- state.ini[optim_initials]
- flag <- 0
state.ini.optim.boxnames <- names(state.ini.optim)
+
if (length(state.ini.optim) > 0) {
names(state.ini.optim) <- paste(names(state.ini.optim),
"0", sep = "_")
}
- costFunctions <- CakeInternalCostFunctions(mkinmod, state.ini.optim, state.ini.optim.boxnames,
- state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, quiet, atol=atol)
- bestIteration <- -1;
- costWithStatus <- function(P, ...){
- r <- costFunctions$cost(P)
- if(r$cost == costFunctions$get.best.cost()) {
- bestIteration<<-costFunctions$get.calls();
- cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n')
- }
- cat("MCMC Call no.", costFunctions$get.calls(), '\n')
- return(r)
- }
+ costFunctions <- CakeInternalCostFunctions(cake.model, state.ini.optim, state.ini.optim.boxnames,
+ state.ini.fixed, parms.fixed, observed, mkindiff, quiet, atol=atol)
+ bestIteration <- -1;
+ costWithStatus <- function(P, ...){
+ r <- costFunctions$cost(P)
+ if(r$cost == costFunctions$get.best.cost()) {
+ bestIteration<<-costFunctions$get.calls();
+ cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n')
+ }
+ cat("MCMC Call no.", costFunctions$get.calls(), '\n')
+ return(r)
+ }
# Set the seed
if ( is.null(seed) ) {
# No seed so create a random one so there is something to report
seed = runif(1,0,2^31-1)
}
+
seed = as.integer(seed)
set.seed(seed)
- if(fitstart==TRUE)
- {
- ## ############# Get Initial Paramtervalues #############
+ ## ############# Get Initial Paramtervalues #############
## Start with no weighting
-
- if(useSolnp)
+ if(useExtraSolver)
{
- pnames=names(c(state.ini.optim, parms.optim))
- fn <- function(P){
- names(P) <- pnames
- FF<<-cost(P)
- return(FF$model)}
- a <- try(fit <- solnp(c(state.ini.optim, parms.optim),fun=fn,LB=lower,UB=upper,control=control),silent=TRUE)
- flag <- 1
- optimmethod <- 'solnp'
+ a <- try(fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='Port',control=control))
+
if(class(a) == "try-error")
{
- print('solnp fails, try PORT or other algorithm by users choice, might take longer time. Do something else!')
- warning('solnp fails, switch to PORT or other algorithm by users choice')
- a <- try(fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='Port',control=control))
- flag <- 0
-
- if(class(a) == "try-error")
- {
- fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method=method,control=control)
- }
- ## now using submethod already
+ fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='L-BFGS-B',control=control)
}
}
else
{
- fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower,
+ # modFit parameter transformations can explode if you put in parameters that are equal to a bound, so we move them away by a tiny amount.
+ all.optim <- ShiftAwayFromBoundaries(c(state.ini.optim, parms.optim), lower, upper)
+
+ fit <- modFit(costFunctions$cost, all.optim, lower = lower,
upper = upper,...)
- }
+ }
- if(commonsigma==TRUE){
- #observed$err <- rep(1,nrow(observed))
- if(flag==1 && useSolnp)## fit from solnp
- {
- ## run a small loop with 'Marq' or some other method
- fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, method='Port',control=control)
- }
- cov0 <- summary(fit)$cov.scaled*2.4^2/length(fit$par)
- costFunctions$set.calls(0); costFunctions$reset.best.cost()
- res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose)
- #res <- modMCMC(cost,fit$par,lower=lower,upper=upper,niter=niter)
- }else{
- ## ############## One reweighted estimation ############################
- ## Estimate the error variance(sd)
- tmpres <- fit$residuals
- oldERR <- observed$err
- err <- rep(NA,length(mod_vars))
- for(i in 1:length(mod_vars))
- {
- box <- mod_vars[i]
- ind <- which(names(tmpres)==box)
- tmp <- tmpres[ind]
- err[i] <- sd(tmp)
- }
- names(err) <- mod_vars
- ERR <- err[as.character(observed$name)]
- observed$err <-ERR
- costFunctions$set.error(ERR)
- if(flag==1 && useSolnp)## fit from solnp
- {
- fit$ssr <- fit$values[length(fit$values)]
- fit$residuals <-FF$residual$res
- ## mean square per varaible
- if (class(FF) == "modCost") {
- names(fit$residuals) <- FF$residuals$name
- fit$var_ms <- FF$var$SSR/FF$var$N
- fit$var_ms_unscaled <- FF$var$SSR.unscaled/FF$var$N
- fit$var_ms_unweighted <- FF$var$SSR.unweighted/FF$var$N
-
- names(fit$var_ms_unweighted) <- names(fit$var_ms_unscaled) <-
- names(fit$var_ms) <- FF$var$name
- } else fit$var_ms <- fit$var_ms_unweighted <- fit$var_ms_unscaled <- NA
- }
- olderr <- rep(1,length(mod_vars))
- diffsigma <- sum((err-olderr)^2)
- ## At least do one iteration step to get a weighted LS
- fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, ...)
- ## Use this as the Input for MCMC algorithm
- ## ##########################
- fs <- summary(fit)
- cov0 <- if(all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled*2.4^2/length(fit$par)
- var0 <- fit$var_ms_unweighted
- costFunctions$set.calls(0); costFunctions$reset.best.cost()
- res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose)
-
+ ## ############## One reweighted estimation ############################
+ ## Estimate the error variance(sd)
+ tmpres <- fit$residuals
+ oldERR <- observed$err
+ err <- rep(NA,length(cake.model$map))
+
+ for(i in 1:length(cake.model$map)) {
+ box <- names(cake.model$map)[i]
+ ind <- which(names(tmpres)==box)
+ tmp <- tmpres[ind]
+ err[i] <- sd(tmp)
}
- }else {
- stop('fitstart=FALSE code removed until needed')
- }
+
+ names(err) <- names(cake.model$map)
+ ERR <- err[as.character(observed$name)]
+ observed$err <-ERR
+ costFunctions$set.error(ERR)
+
+ olderr <- rep(1,length(cake.model$map))
+ diffsigma <- sum((err-olderr)^2)
+ ## At least do one iteration step to get a weighted LS
+ fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, ...)
+ ## Use this as the Input for MCMC algorithm
+ ## ##########################
+ fs <- summary(fit)
+ cov0 <- if(all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled*2.4^2/length(fit$par)
+ var0 <- fit$var_ms_unweighted
+ costFunctions$set.calls(0); costFunctions$reset.best.cost()
+ res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=NULL,var0=var0,wvar0=0.1,niter=niter,outputlength=niter,burninlength=0,updatecov=niter,ntrydr=1,drscale=NULL,verbose=verbose)
+
# Construct the fit object to return
fit$start <- data.frame(initial = c(state.ini.optim, parms.optim))
fit$start$type = c(rep("state", length(state.ini.optim)),
@@ -189,92 +174,72 @@ CakeMcmcFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$
rep("deparm", length(parms.fixed)))
fit$mkindiff <- mkindiff
- fit$map <- mkinmod$map
- fit$diffs <- mkinmod$diffs
-
- # mkin_long_to_wide does not handle ragged data
- fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time")
- #names(fit$observed) <- c("time", as.vector(unique(observed$name)))
- fns <- function(str) {
- fields <- strsplit(str, "\\.")[[1]]
- if (fields[1] == "value") {
- return(fields[2])
- }
- else {
- return(str)
- }
- }
- names(fit$observed) <- sapply(names(fit$observed), fns)
+ fit$map <- cake.model$map
+ fit$diffs <- cake.model$diffs
- # Replace mean from modFit with mean from modMCMC
- fnm <- function(x) mean(res$pars[,x])
- fit$par <- sapply(dimnames(res$pars)[[2]],fnm)
- fit$bestpar <- res$bestpar
- fit$costfn <- costWithStatus
+ # Replace mean from modFit with mean from modMCMC
+ fnm <- function(x) mean(res$pars[,x])
+ fit$par <- sapply(dimnames(res$pars)[[2]],fnm)
+ fit$bestpar <- res$bestpar
+ fit$costfn <- costWithStatus
- # Disappearence times
- parms.all <- c(fit$par, parms.fixed)
- obs_vars = unique(as.character(observed$name))
- fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)),
+ # Disappearence times
+ parms.all <- c(fit$par, parms.fixed)
+ obs_vars = unique(as.character(observed$name))
+ fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)),
DT90 = rep(NA, length(obs_vars)), row.names = obs_vars)
- fit$extraDT50<- data.frame(DT50 = rep(NA, 2), row.names = c("k1", "k2"))
- for (obs_var in obs_vars) {
- type = names(mkinmod$map[[obs_var]])[1]
- fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all,sannMaxIter)
+ fit$extraDT50<- data.frame(k1 = rep(NA, length(names(cake.model$map))), k2 = rep(NA, length(names(cake.model$map))), row.names = names(cake.model$map))
+
+ for (compartment.name in names(cake.model$map)) {
+ type = names(cake.model$map[[compartment.name]])[1]
+ fit$distimes[compartment.name, ] = CakeDT(type,compartment.name,parms.all,sannMaxIter)
+ fit$extraDT50[compartment.name, ] = CakeExtraDT(type, compartment.name, parms.all)
}
-
- fit$extraDT50[ ,c("DT50")] = CakeExtraDT(type,parms.all)
- # Fits to data
- odeini <- state.ini
- odeparms <- parms.all
- # If this step modelled the start amount, include it. Lookup in vectors returns NA not null if missing
- if(!is.na(odeparms["Parent_0"])) { odeini['Parent'] = odeparms["Parent_0"] }
+ fit$ioreRepDT = CakeIORERepresentativeDT("Parent", parms.all)
+ fit$fomcRepDT = CakeFOMCBackCalculatedDT(parms.all)
+
+ # Fits to data
+ odeini <- state.ini
+ odeparms <- parms.all
+ # If this step modelled the start amount, include it. Lookup in vectors returns NA not null if missing
+ if(!is.na(odeparms["Parent_0"])) { odeini['Parent'] = odeparms["Parent_0"] }
- # Solve the system
- evalparse <- function(string)
- {
- eval(parse(text=string), as.list(c(odeparms, odeini)))
- }
+ # Solve the system
+ evalparse <- function(string)
+ {
+ eval(parse(text=string), as.list(c(odeparms, odeini)))
+ }
- # Ensure initial state is at time 0
- obstimes <- unique(c(0,observed$time))
+ # Ensure initial state is at time 0
+ obstimes <- unique(c(0,observed$time))
- out <- ode(
- y = odeini,
- times = obstimes,
- func = mkindiff,
- parms = odeparms,
- )
-
- # Output transformation for models with unobserved compartments like SFORB
- out_transformed <- data.frame(time = out[,"time"])
- for (var in names(mkinmod$map)) {
- if(length(mkinmod$map[[var]]) == 1) {
- out_transformed[var] <- out[, var]
- } else {
- out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]])
- }
- }
- fit$predicted <- out_transformed
- fit$penalties <- CakePenaltiesLong(odeparms, out_transformed, observed)
+ odeini <- AdjustOdeInitialValues(odeini, cake.model, odeparms)
+
+ out <- ode(y = odeini, times = obstimes, func = mkindiff, parms = odeparms, atol = atol)
+
+ out_transformed <- PostProcessOdeOutput(out, cake.model, atol)
+
+ fit$predicted <- out_transformed
+ fit$penalties <- CakePenaltiesLong(odeparms, out_transformed, observed)
- predicted_long <- mkin_wide_to_long(out_transformed,time="time")
- obs_vars = unique(as.character(observed$name))
- fit$errmin <- CakeChi2(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini)
+ predicted_long <- wide_to_long(out_transformed,time="time")
+ obs_vars = unique(as.character(observed$name))
+ fit$errmin <- CakeChi2(cake.model, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini, fit$fixed)
- data<-observed
- data$err<-rep(NA,length(data$time))
- data<-merge(data, predicted_long, by=c("time","name"))
- names(data)<-c("time", "variable", "observed","err-var", "predicted")
- data$residual<-data$observed-data$predicted
- data$variable <- ordered(data$variable, levels = obs_vars)
- fit$data <- data[order(data$variable, data$time), ]
+ data<-observed
+ data$err<-rep(NA,length(data$time))
+ data<-merge(data, predicted_long, by=c("time","name"))
+ names(data)<-c("time", "variable", "observed","err-var", "predicted")
+ data$residual<-data$observed-data$predicted
+ data$variable <- ordered(data$variable, levels = obs_vars)
+ fit$data <- data[order(data$variable, data$time), ]
+ fit$atol <- atol
- sq <- data$residual * data$residual
- fit$ssr <- sum(sq)
+ sq <- data$residual * data$residual
+ fit$ssr <- sum(sq)
- fit$seed = seed
+ fit$seed = seed
fit$res <- res
class(fit) <- c("CakeMcmcFit", "mkinfit", "modFit")
@@ -355,10 +320,14 @@ summary.CakeMcmcFit <- function(object, data = TRUE, distimes = TRUE, halflives
ans$start <- object$start
ans$fixed <- object$fixed
ans$errmin <- object$errmin
- if(distimes) ans$distimes <- object$distimes
+ if(distimes){
+ ans$distimes <- object$distimes
+ ans$extraDT50 <- object$extraDT50
+ ans$ioreRepDT <- object$ioreRepDT
+ ans$fomcRepDT <- object$fomcRepDT
+ }
if(halflives) ans$halflives <- object$halflives
if(ff) ans$ff <- object$ff
- if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB
class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.modFit")
return(ans)
}

Contact - Imprint