diff options
Diffstat (limited to 'CakeMcmcFit.R')
-rw-r--r-- | CakeMcmcFit.R | 361 |
1 files changed, 165 insertions, 196 deletions
diff --git a/CakeMcmcFit.R b/CakeMcmcFit.R index f9a340e..65c9431 100644 --- a/CakeMcmcFit.R +++ b/CakeMcmcFit.R @@ -1,11 +1,9 @@ -# $Id$ -# The CAKE R modules are based on mkin, +# Some of the CAKE R modules are based on mkin, # Based on mcmckinfit as modified by Bayer -# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta -# Author: Rob Nelson, Tamar Christina -# Tessella Project Reference: 6245, 7247 +# Modifications developed by Tessella for Syngenta: Copyright (C) 2011-2016 Syngenta +# Tessella Project Reference: 6245, 7247, 8361, 7414 -# This program is free software: you can redistribute it and/or modify +# The CAKE R modules are free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. @@ -16,168 +14,155 @@ # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>.” +# along with this program. If not, see <http://www.gnu.org/licenses/>. -CakeMcmcFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)), - state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), lower = 0, - upper = Inf, fixed_parms = NULL, fixed_initials = names(mkinmod$diffs)[-1], - plot = FALSE, quiet = FALSE, err = NULL, weight = "none", - scaleVar = FALSE, commonsigma=FALSE,jump = NULL,prior = NULL, - wvar0=0.1,niter = 1000, - outputlength = niter, burninlength = 0, updatecov = niter, - ntrydr = 1, drscale = NULL, verbose = TRUE,fitstart=TRUE,seed=NULL,atol=1e-6, - sannMaxIter = 10000, control=list(), - useSolnp = FALSE, method='L-BFGS-B',...) +# Performs an Markov chain Monte Carlo least squares fit on a given CAKE model. +# +# cake.model: The model to perform the fit on (as generated by CakeModel.R). +# observed: Observation data to fit to. +# parms.ini: Initial values for the parameters being fitted. +# state.ini: Initial state (i.e. initial values for concentration, the dependent variable being modelled). +# lower: Lower bounds to apply to parameters. +# upper: Upper bound to apply to parameters. +# fixed_parms: A vector of names of parameters that are fixed to their initial values. +# fixed_initials: A vector of compartments with fixed initial concentrations. +# quiet: Whether the internal cost functions should execute more quietly than normal (less output). +# niter: The number of MCMC iterations to apply. +# atol: The tolerance to apply to the ODE solver. +# sannMaxIter: The maximum number of iterations to apply to SANN processes. +# control: ... +# useExtraSolver: Whether to use the extra solver for this fit (only used for the initial first fit). +CakeMcmcFit <- function (cake.model, + observed, + parms.ini, + state.ini, + lower, + upper, + fixed_parms = NULL, + fixed_initials, + quiet = FALSE, + niter = 1000, + verbose = TRUE, + seed=NULL, + atol=1e-6, + sannMaxIter = 10000, + control=list(), + useExtraSolver = FALSE, + ...) { - NAind <-which(is.na(observed$value)) - mod_vars <- names(mkinmod$diffs) - observed <- subset(observed, name %in% names(mkinmod$map)) + mod_vars <- names(cake.model$diffs) + observed <- subset(observed, name %in% names(cake.model$map)) ERR <- rep(1,nrow(observed)) observed <- cbind(observed,err=ERR) obs_vars = unique(as.character(observed$name)) - if (is.null(names(parms.ini))) - names(parms.ini) <- mkinmod$parms + + if (is.null(names(parms.ini))) { + names(parms.ini) <- cake.model$parms + } + mkindiff <- function(t, state, parms) { time <- t diffs <- vector() + for (box in mod_vars) { diffname <- paste("d", box, sep = "_") diffs[diffname] <- with(as.list(c(time, state, parms)), - eval(parse(text = mkinmod$diffs[[box]]))) + eval(parse(text = cake.model$diffs[[box]]))) } + return(list(c(diffs))) } - if (is.null(names(state.ini))) + + if (is.null(names(state.ini))) { names(state.ini) <- mod_vars + } + parms.fixed <- parms.ini[fixed_parms] optim_parms <- setdiff(names(parms.ini), fixed_parms) parms.optim <- parms.ini[optim_parms] state.ini.fixed <- state.ini[fixed_initials] optim_initials <- setdiff(names(state.ini), fixed_initials) state.ini.optim <- state.ini[optim_initials] - flag <- 0 state.ini.optim.boxnames <- names(state.ini.optim) + if (length(state.ini.optim) > 0) { names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep = "_") } - costFunctions <- CakeInternalCostFunctions(mkinmod, state.ini.optim, state.ini.optim.boxnames, - state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, quiet, atol=atol) - bestIteration <- -1; - costWithStatus <- function(P, ...){ - r <- costFunctions$cost(P) - if(r$cost == costFunctions$get.best.cost()) { - bestIteration<<-costFunctions$get.calls(); - cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n') - } - cat("MCMC Call no.", costFunctions$get.calls(), '\n') - return(r) - } + costFunctions <- CakeInternalCostFunctions(cake.model, state.ini.optim, state.ini.optim.boxnames, + state.ini.fixed, parms.fixed, observed, mkindiff, quiet, atol=atol) + bestIteration <- -1; + costWithStatus <- function(P, ...){ + r <- costFunctions$cost(P) + if(r$cost == costFunctions$get.best.cost()) { + bestIteration<<-costFunctions$get.calls(); + cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n') + } + cat("MCMC Call no.", costFunctions$get.calls(), '\n') + return(r) + } # Set the seed if ( is.null(seed) ) { # No seed so create a random one so there is something to report seed = runif(1,0,2^31-1) } + seed = as.integer(seed) set.seed(seed) - if(fitstart==TRUE) - { - ## ############# Get Initial Paramtervalues ############# + ## ############# Get Initial Paramtervalues ############# ## Start with no weighting - - if(useSolnp) + if(useExtraSolver) { - pnames=names(c(state.ini.optim, parms.optim)) - fn <- function(P){ - names(P) <- pnames - FF<<-cost(P) - return(FF$model)} - a <- try(fit <- solnp(c(state.ini.optim, parms.optim),fun=fn,LB=lower,UB=upper,control=control),silent=TRUE) - flag <- 1 - optimmethod <- 'solnp' + a <- try(fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='Port',control=control)) + if(class(a) == "try-error") { - print('solnp fails, try PORT or other algorithm by users choice, might take longer time. Do something else!') - warning('solnp fails, switch to PORT or other algorithm by users choice') - a <- try(fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='Port',control=control)) - flag <- 0 - - if(class(a) == "try-error") - { - fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method=method,control=control) - } - ## now using submethod already + fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='L-BFGS-B',control=control) } } else { - fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, + # modFit parameter transformations can explode if you put in parameters that are equal to a bound, so we move them away by a tiny amount. + all.optim <- ShiftAwayFromBoundaries(c(state.ini.optim, parms.optim), lower, upper) + + fit <- modFit(costFunctions$cost, all.optim, lower = lower, upper = upper,...) - } + } - if(commonsigma==TRUE){ - #observed$err <- rep(1,nrow(observed)) - if(flag==1 && useSolnp)## fit from solnp - { - ## run a small loop with 'Marq' or some other method - fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, method='Port',control=control) - } - cov0 <- summary(fit)$cov.scaled*2.4^2/length(fit$par) - costFunctions$set.calls(0); costFunctions$reset.best.cost() - res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose) - #res <- modMCMC(cost,fit$par,lower=lower,upper=upper,niter=niter) - }else{ - ## ############## One reweighted estimation ############################ - ## Estimate the error variance(sd) - tmpres <- fit$residuals - oldERR <- observed$err - err <- rep(NA,length(mod_vars)) - for(i in 1:length(mod_vars)) - { - box <- mod_vars[i] - ind <- which(names(tmpres)==box) - tmp <- tmpres[ind] - err[i] <- sd(tmp) - } - names(err) <- mod_vars - ERR <- err[as.character(observed$name)] - observed$err <-ERR - costFunctions$set.error(ERR) - if(flag==1 && useSolnp)## fit from solnp - { - fit$ssr <- fit$values[length(fit$values)] - fit$residuals <-FF$residual$res - ## mean square per varaible - if (class(FF) == "modCost") { - names(fit$residuals) <- FF$residuals$name - fit$var_ms <- FF$var$SSR/FF$var$N - fit$var_ms_unscaled <- FF$var$SSR.unscaled/FF$var$N - fit$var_ms_unweighted <- FF$var$SSR.unweighted/FF$var$N - - names(fit$var_ms_unweighted) <- names(fit$var_ms_unscaled) <- - names(fit$var_ms) <- FF$var$name - } else fit$var_ms <- fit$var_ms_unweighted <- fit$var_ms_unscaled <- NA - } - olderr <- rep(1,length(mod_vars)) - diffsigma <- sum((err-olderr)^2) - ## At least do one iteration step to get a weighted LS - fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, ...) - ## Use this as the Input for MCMC algorithm - ## ########################## - fs <- summary(fit) - cov0 <- if(all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled*2.4^2/length(fit$par) - var0 <- fit$var_ms_unweighted - costFunctions$set.calls(0); costFunctions$reset.best.cost() - res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose) - + ## ############## One reweighted estimation ############################ + ## Estimate the error variance(sd) + tmpres <- fit$residuals + oldERR <- observed$err + err <- rep(NA,length(cake.model$map)) + + for(i in 1:length(cake.model$map)) { + box <- names(cake.model$map)[i] + ind <- which(names(tmpres)==box) + tmp <- tmpres[ind] + err[i] <- sd(tmp) } - }else { - stop('fitstart=FALSE code removed until needed') - } + + names(err) <- names(cake.model$map) + ERR <- err[as.character(observed$name)] + observed$err <-ERR + costFunctions$set.error(ERR) + + olderr <- rep(1,length(cake.model$map)) + diffsigma <- sum((err-olderr)^2) + ## At least do one iteration step to get a weighted LS + fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, ...) + ## Use this as the Input for MCMC algorithm + ## ########################## + fs <- summary(fit) + cov0 <- if(all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled*2.4^2/length(fit$par) + var0 <- fit$var_ms_unweighted + costFunctions$set.calls(0); costFunctions$reset.best.cost() + res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=NULL,var0=var0,wvar0=0.1,niter=niter,outputlength=niter,burninlength=0,updatecov=niter,ntrydr=1,drscale=NULL,verbose=verbose) + # Construct the fit object to return fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) fit$start$type = c(rep("state", length(state.ini.optim)), @@ -189,92 +174,72 @@ CakeMcmcFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$ rep("deparm", length(parms.fixed))) fit$mkindiff <- mkindiff - fit$map <- mkinmod$map - fit$diffs <- mkinmod$diffs - - # mkin_long_to_wide does not handle ragged data - fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") - #names(fit$observed) <- c("time", as.vector(unique(observed$name))) - fns <- function(str) { - fields <- strsplit(str, "\\.")[[1]] - if (fields[1] == "value") { - return(fields[2]) - } - else { - return(str) - } - } - names(fit$observed) <- sapply(names(fit$observed), fns) + fit$map <- cake.model$map + fit$diffs <- cake.model$diffs - # Replace mean from modFit with mean from modMCMC - fnm <- function(x) mean(res$pars[,x]) - fit$par <- sapply(dimnames(res$pars)[[2]],fnm) - fit$bestpar <- res$bestpar - fit$costfn <- costWithStatus + # Replace mean from modFit with mean from modMCMC + fnm <- function(x) mean(res$pars[,x]) + fit$par <- sapply(dimnames(res$pars)[[2]],fnm) + fit$bestpar <- res$bestpar + fit$costfn <- costWithStatus - # Disappearence times - parms.all <- c(fit$par, parms.fixed) - obs_vars = unique(as.character(observed$name)) - fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), + # Disappearence times + parms.all <- c(fit$par, parms.fixed) + obs_vars = unique(as.character(observed$name)) + fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) - fit$extraDT50<- data.frame(DT50 = rep(NA, 2), row.names = c("k1", "k2")) - for (obs_var in obs_vars) { - type = names(mkinmod$map[[obs_var]])[1] - fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all,sannMaxIter) + fit$extraDT50<- data.frame(k1 = rep(NA, length(names(cake.model$map))), k2 = rep(NA, length(names(cake.model$map))), row.names = names(cake.model$map)) + + for (compartment.name in names(cake.model$map)) { + type = names(cake.model$map[[compartment.name]])[1] + fit$distimes[compartment.name, ] = CakeDT(type,compartment.name,parms.all,sannMaxIter) + fit$extraDT50[compartment.name, ] = CakeExtraDT(type, compartment.name, parms.all) } - - fit$extraDT50[ ,c("DT50")] = CakeExtraDT(type,parms.all) - # Fits to data - odeini <- state.ini - odeparms <- parms.all - # If this step modelled the start amount, include it. Lookup in vectors returns NA not null if missing - if(!is.na(odeparms["Parent_0"])) { odeini['Parent'] = odeparms["Parent_0"] } + fit$ioreRepDT = CakeIORERepresentativeDT("Parent", parms.all) + fit$fomcRepDT = CakeFOMCBackCalculatedDT(parms.all) + + # Fits to data + odeini <- state.ini + odeparms <- parms.all + # If this step modelled the start amount, include it. Lookup in vectors returns NA not null if missing + if(!is.na(odeparms["Parent_0"])) { odeini['Parent'] = odeparms["Parent_0"] } - # Solve the system - evalparse <- function(string) - { - eval(parse(text=string), as.list(c(odeparms, odeini))) - } + # Solve the system + evalparse <- function(string) + { + eval(parse(text=string), as.list(c(odeparms, odeini))) + } - # Ensure initial state is at time 0 - obstimes <- unique(c(0,observed$time)) + # Ensure initial state is at time 0 + obstimes <- unique(c(0,observed$time)) - out <- ode( - y = odeini, - times = obstimes, - func = mkindiff, - parms = odeparms, - ) - - # Output transformation for models with unobserved compartments like SFORB - out_transformed <- data.frame(time = out[,"time"]) - for (var in names(mkinmod$map)) { - if(length(mkinmod$map[[var]]) == 1) { - out_transformed[var] <- out[, var] - } else { - out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) - } - } - fit$predicted <- out_transformed - fit$penalties <- CakePenaltiesLong(odeparms, out_transformed, observed) + odeini <- AdjustOdeInitialValues(odeini, cake.model, odeparms) + + out <- ode(y = odeini, times = obstimes, func = mkindiff, parms = odeparms, atol = atol) + + out_transformed <- PostProcessOdeOutput(out, cake.model, atol) + + fit$predicted <- out_transformed + fit$penalties <- CakePenaltiesLong(odeparms, out_transformed, observed) - predicted_long <- mkin_wide_to_long(out_transformed,time="time") - obs_vars = unique(as.character(observed$name)) - fit$errmin <- CakeChi2(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini) + predicted_long <- wide_to_long(out_transformed,time="time") + obs_vars = unique(as.character(observed$name)) + fit$errmin <- CakeChi2(cake.model, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini, fit$fixed) - data<-observed - data$err<-rep(NA,length(data$time)) - data<-merge(data, predicted_long, by=c("time","name")) - names(data)<-c("time", "variable", "observed","err-var", "predicted") - data$residual<-data$observed-data$predicted - data$variable <- ordered(data$variable, levels = obs_vars) - fit$data <- data[order(data$variable, data$time), ] + data<-observed + data$err<-rep(NA,length(data$time)) + data<-merge(data, predicted_long, by=c("time","name")) + names(data)<-c("time", "variable", "observed","err-var", "predicted") + data$residual<-data$observed-data$predicted + data$variable <- ordered(data$variable, levels = obs_vars) + fit$data <- data[order(data$variable, data$time), ] + fit$atol <- atol - sq <- data$residual * data$residual - fit$ssr <- sum(sq) + sq <- data$residual * data$residual + fit$ssr <- sum(sq) - fit$seed = seed + fit$seed = seed fit$res <- res class(fit) <- c("CakeMcmcFit", "mkinfit", "modFit") @@ -355,10 +320,14 @@ summary.CakeMcmcFit <- function(object, data = TRUE, distimes = TRUE, halflives ans$start <- object$start ans$fixed <- object$fixed ans$errmin <- object$errmin - if(distimes) ans$distimes <- object$distimes + if(distimes){ + ans$distimes <- object$distimes + ans$extraDT50 <- object$extraDT50 + ans$ioreRepDT <- object$ioreRepDT + ans$fomcRepDT <- object$fomcRepDT + } if(halflives) ans$halflives <- object$halflives if(ff) ans$ff <- object$ff - if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.modFit") return(ans) } |