summaryrefslogtreecommitdiff
path: root/CakeMcmcFit.R
diff options
context:
space:
mode:
Diffstat (limited to 'CakeMcmcFit.R')
-rwxr-xr-x[-rw-r--r--]CakeMcmcFit.R482
1 files changed, 227 insertions, 255 deletions
diff --git a/CakeMcmcFit.R b/CakeMcmcFit.R
index be57cef..0a5bf4a 100644..100755
--- a/CakeMcmcFit.R
+++ b/CakeMcmcFit.R
@@ -1,6 +1,7 @@
# Some of the CAKE R modules are based on mkin,
# Based on mcmckinfit as modified by Bayer
-# Modifications developed by Tessella for Syngenta: Copyright (C) 2011-2020 Syngenta
+# Modifications developed by Hybrid Intelligence (formerly Tessella), part of
+# Capgemini Engineering, for Syngenta, Copyright (C) 2011-2022 Syngenta
# Tessella Project Reference: 6245, 7247, 8361, 7414, 10091
# The CAKE R modules are free software: you can redistribute it and/or modify
@@ -32,270 +33,241 @@
# dfopDtMaxIter: The maximum number of iterations to apply to DFOP DT calculation.
# control: ...
# useExtraSolver: Whether to use the extra solver for this fit (only used for the initial first fit).
-CakeMcmcFit <- function (cake.model,
- observed,
- parms.ini,
- state.ini,
- lower,
- upper,
- fixed_parms = NULL,
- fixed_initials,
- quiet = FALSE,
- niter = 1000,
- verbose = TRUE,
- seed=NULL,
- atol=1e-6,
- dfopDtMaxIter = 10000,
- control=list(),
- useExtraSolver = FALSE,
- ...)
-{
- NAind <-which(is.na(observed$value))
- mod_vars <- names(cake.model$diffs)
- observed <- subset(observed, name %in% names(cake.model$map))
- ERR <- rep(1,nrow(observed))
- observed <- cbind(observed,err=ERR)
- obs_vars <- unique(as.character(observed$name))
-
- if (is.null(names(parms.ini))) {
- names(parms.ini) <- cake.model$parms
- }
-
- mkindiff <- function(t, state, parms) {
- time <- t
- diffs <- vector()
-
- for (box in mod_vars) {
- diffname <- paste("d", box, sep = "_")
- diffs[diffname] <- with(as.list(c(time, state, parms)),
- eval(parse(text = cake.model$diffs[[box]])))
+CakeMcmcFit <- function(cake.model,
+ observed,
+ parms.ini,
+ state.ini,
+ lower,
+ upper,
+ fixed_parms = NULL,
+ fixed_initials,
+ quiet = FALSE,
+ niter = 1000,
+ verbose = TRUE,
+ seed = NULL,
+ atol = 1e-6,
+ dfopDtMaxIter = 10000,
+ control = list(),
+ useExtraSolver = FALSE) {
+
+ fit <- CakeFit("MCMC",
+ cake.model,
+ observed,
+ parms.ini,
+ state.ini,
+ lower,
+ upper,
+ fixed_parms,
+ fixed_initials,
+ quiet,
+ niter = niter,
+ verbose = verbose,
+ seed = seed,
+ atol = atol,
+ dfopDtMaxIter = dfopDtMaxIter,
+ control = control,
+ useExtraSolver = useExtraSolver)
+
+ return(fit)
+}
+
+GetMcmcSpecificSetup <- function() {
+ return(function(
+ cake.model,
+ state.ini.optim,
+ state.ini.optim.boxnames,
+ state.ini.fixed,
+ parms.fixed,
+ observed,
+ mkindiff,
+ quiet,
+ atol,
+ solution,
+ ...) {
+ seed <- list(...)$seed
+
+ costFunctions <- CakeInternalCostFunctions(cake.model, state.ini.optim, state.ini.optim.boxnames, state.ini.fixed,
+ parms.fixed, observed, mkindiff = mkindiff, quiet, atol = atol, solution = solution)
+
+ bestIteration <<- -1;
+ costWithStatus <- function(P, ...) {
+ r <- costFunctions$cost(P)
+ if (r$cost == costFunctions$get.best.cost()) {
+ bestIteration <<- costFunctions$get.calls();
+ cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n')
+ }
+
+ arguments <- list(...)
+ if (costFunctions$get.calls() <= arguments$maxCallNo) {
+ cat("MCMC Call no.", costFunctions$get.calls(), '\n')
+ }
+
+ return(r)
}
-
- return(list(c(diffs)))
- }
-
- if (is.null(names(state.ini))) {
- names(state.ini) <- mod_vars
- }
-
- parms.fixed <- parms.ini[fixed_parms]
- optim_parms <- setdiff(names(parms.ini), fixed_parms)
- parms.optim <- parms.ini[optim_parms]
- state.ini.fixed <- state.ini[fixed_initials]
- optim_initials <- setdiff(names(state.ini), fixed_initials)
- state.ini.optim <- state.ini[optim_initials]
- state.ini.optim.boxnames <- names(state.ini.optim)
-
- if (length(state.ini.optim) > 0) {
- names(state.ini.optim) <- paste(names(state.ini.optim),
- "0", sep = "_")
- }
-
- costFunctions <- CakeInternalCostFunctions(cake.model, state.ini.optim, state.ini.optim.boxnames,
- state.ini.fixed, parms.fixed, observed, mkindiff, quiet, atol=atol)
- bestIteration <- -1;
- costWithStatus <- function(P, ...){
- r <- costFunctions$cost(P)
- if(r$cost == costFunctions$get.best.cost()) {
- bestIteration<<-costFunctions$get.calls();
- cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n')
+
+ # Set the seed
+ if (is.null(seed)) {
+ # No seed so create a random one so there is something to report
+ seed <- runif(1, 0, 2 ^ 31 - 1)
}
-
- arguments <- list(...)
- if (costFunctions$get.calls() <= arguments$maxCallNo)
- {
- cat("MCMC Call no.", costFunctions$get.calls(), '\n')
+
+ seed <- as.integer(seed)
+ set.seed(seed)
+
+ return(list(costFunctions = costFunctions, costWithStatus = costWithStatus, maxIter = NULL, tol = NULL, seed = seed))
+ })
+}
+
+GetMcmcOptimisationRoutine <- function() {
+ return(function(costFunctions, costForExtraSolver, useExtraSolver, parms, lower, upper, control, ...) {
+ mcmcArgs <- list(...)
+ cake.model <- mcmcArgs$cake.model
+ costWithStatus <- mcmcArgs$costWithStatus
+ observed <- mcmcArgs$observed
+ niter <- mcmcArgs$niter
+ verbose <- mcmcArgs$verbose
+
+ # Runs a pre-fit with no weights first, followed by a weighted step (extra solver with first, not with second)
+
+ # Run optimiser, no weighting
+ fitStepResult <- RunFitStep(costFunctions$cost, costForExtraSolver, useExtraSolver, parms, lower, upper, control)
+ fit <- fitStepResult$fit
+ fitted_with_extra_solver <- fitStepResult$fitted_with_extra_solver
+
+
+ # Process extra solver output if it was used
+ if (fitted_with_extra_solver) {
+ fit <- GetFitValuesAfterExtraSolver(fit, FF)
}
- return(r)
- }
+ # One reweighted estimation
+ # Estimate the error variance(sd)
+ tmpres <- fit$residuals
+ oldERR <- observed$err
+ err <- rep(NA, length(cake.model$map))
- # Set the seed
- if ( is.null(seed) ) {
- # No seed so create a random one so there is something to report
- seed <- runif(1,0,2^31-1)
- }
-
- seed <- as.integer(seed)
- set.seed(seed)
-
- ## ############# Get Initial Paramtervalues #############
- ## Start with no weighting
- if(useExtraSolver)
- {
- a <- try(fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='Port',control=control))
-
- if(class(a) == "try-error")
- {
- fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='L-BFGS-B',control=control)
+ for (i in 1:length(cake.model$map)) {
+ box <- names(cake.model$map)[i]
+ ind <- which(names(tmpres) == box)
+ tmp <- tmpres[ind]
+ err[i] <- sd(tmp)
}
- }
- else
- {
- # modFit parameter transformations can explode if you put in parameters that are equal to a bound, so we move them away by a tiny amount.
- all.optim <- ShiftAwayFromBoundaries(c(state.ini.optim, parms.optim), lower, upper)
-
- fit <- modFit(costFunctions$cost, all.optim, lower = lower,
- upper = upper,...)
- }
-
- ## ############## One reweighted estimation ############################
- ## Estimate the error variance(sd)
- tmpres <- fit$residuals
- oldERR <- observed$err
- err <- rep(NA,length(cake.model$map))
-
- for(i in 1:length(cake.model$map)) {
- box <- names(cake.model$map)[i]
- ind <- which(names(tmpres)==box)
- tmp <- tmpres[ind]
- err[i] <- sd(tmp)
- }
-
- names(err) <- names(cake.model$map)
- ERR <- err[as.character(observed$name)]
- observed$err <-ERR
- costFunctions$set.error(ERR)
-
- olderr <- rep(1,length(cake.model$map))
- diffsigma <- sum((err-olderr)^2)
- ## At least do one iteration step to get a weighted LS
- fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, ...)
- ## Use this as the Input for MCMC algorithm
- ## ##########################
- fs <- summary(fit)
- cov0 <- if(all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled*2.4^2/length(fit$par)
- var0 <- fit$var_ms_unweighted
- costFunctions$set.calls(0); costFunctions$reset.best.cost()
- res <- modMCMC(costWithStatus, maxCallNo=niter, fit$par,...,jump=cov0,lower=lower,upper=upper,prior=NULL,var0=var0,wvar0=0.1,niter=niter,outputlength=niter,burninlength=0,updatecov=niter,ntrydr=1,drscale=NULL,verbose=verbose)
-
- # Construct the fit object to return
- fit$start <- data.frame(initial = c(state.ini.optim, parms.optim))
- fit$start$type <- c(rep("state", length(state.ini.optim)),
- rep("deparm", length(parms.optim)))
- fit$start$lower <- lower
- fit$start$upper <- upper
- fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed))
- fit$fixed$type <- c(rep("state", length(state.ini.fixed)),
- rep("deparm", length(parms.fixed)))
-
- fit$mkindiff <- mkindiff
- fit$map <- cake.model$map
- fit$diffs <- cake.model$diffs
-
- # Replace mean from modFit with mean from modMCMC
- fnm <- function(x) mean(res$pars[,x])
- fit$par <- sapply(dimnames(res$pars)[[2]],fnm)
- fit$bestpar <- res$bestpar
- fit$costfn <- costWithStatus
-
- # Disappearence times
- parms.all <- c(fit$par, parms.fixed)
- obs_vars <- unique(as.character(observed$name))
- fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)),
- DT90 = rep(NA, length(obs_vars)), row.names = obs_vars)
- fit$extraDT50<- data.frame(k1 = rep(NA, length(names(cake.model$map))), k2 = rep(NA, length(names(cake.model$map))), row.names = names(cake.model$map))
-
- for (compartment.name in names(cake.model$map)) {
- type <- names(cake.model$map[[compartment.name]])[1]
- fit$distimes[compartment.name, ] <- CakeDT(type,compartment.name,parms.all,dfopDtMaxIter)
- fit$extraDT50[compartment.name, ] <- CakeExtraDT(type, compartment.name, parms.all)
- }
-
- fit$ioreRepDT <- CakeIORERepresentativeDT("Parent", parms.all)
- fit$fomcRepDT <- CakeFOMCBackCalculatedDT(parms.all)
-
- # Ensure initial state is at time 0
- obstimes <- unique(c(0,observed$time))
-
- # Solve the system
- out_predicted <- CakeOdeSolve(fit, obstimes, solution = "deSolve", atol)
- out_transformed <- PostProcessOdeOutput(out_predicted, cake.model, atol)
-
- fit$predicted <- out_transformed
- fit$penalties <- CakePenaltiesLong(parms.all, out_transformed, observed)
-
- predicted_long <- wide_to_long(out_transformed,time="time")
- obs_vars <- unique(as.character(observed$name))
- fit$errmin <- CakeChi2(cake.model, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini, fit$fixed)
-
- data<-observed
- data$err<-rep(NA,length(data$time))
- data<-merge(data, predicted_long, by=c("time","name"))
- names(data)<-c("time", "variable", "observed","err-var", "predicted")
- data$residual<-data$observed-data$predicted
- data$variable <- ordered(data$variable, levels = obs_vars)
- fit$data <- data[order(data$variable, data$time), ]
- fit$atol <- atol
- fit$topology <- cake.model$topology
-
- sq <- data$residual * data$residual
- fit$ssr <- sum(sq)
-
- fit$seed <- seed
-
- fit$res <- res
- class(fit) <- c("CakeMcmcFit", "mkinfit", "modFit")
- return(fit)
+
+ names(err) <- names(cake.model$map)
+ ERR <- err[as.character(observed$name)]
+ observed$err <- ERR
+ costFunctions$set.error(ERR)
+
+ olderr <- rep(1, length(cake.model$map))
+ diffsigma <- sum((err - olderr) ^ 2)
+
+ ## At least do one iteration step to get a weighted LS
+ fit <- modFit(f = costFunctions$cost, p = fit$par, lower = lower, upper = upper)
+
+ # Run MCMC optimiser with output from weighted fit
+ # Apply iterative re-weighting here (iterations fixed to 1 for now):
+ # Do modMCMC as below, we should also pass in the final priors for subsequent iterations
+ # Use modMCMC average as input to next modMCMC run (as done in block 3 to get final parameters)
+ # use errors from previous step as inputs to modMCMC cov0 and var0 at each iteration
+
+ fs <- summary(fit)
+ cov0 <- if (all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled * 2.4 ^ 2 / length(fit$par)
+ var0 <- fit$var_ms_unweighted
+ costFunctions$set.calls(0);
+ costFunctions$reset.best.cost()
+ res <- modMCMC(f = costWithStatus, p = fit$par, maxCallNo = niter, jump = cov0, lower = lower, upper = upper, prior = NULL, var0 = var0, wvar0 = 0.1, niter = niter, outputlength = niter, burninlength = 0, updatecov = niter, ntrydr = 1, drscale = NULL, verbose = verbose)
+ return(list(fit = fitStepResult$fit, fitted_with_extra_solver = fitStepResult$fitted_with_extra_solver, res = res))
+ })
}
+GetMcmcSpecificWrapUp <- function() {
+ return(function(fit, ...) {
+ args <- list(...)
+ res <- args$res
+ seed <- args$seed
+ costWithStatus <- args$costWithStatus
+ observed <- args$observed
+ parms.fixed <- args$parms.fixed
+
+ # Replace mean from modFit with mean from modMCMC
+ fnm <- function(x) mean(res$pars[, x])
+ fit$par <- sapply(dimnames(res$pars)[[2]], fnm)
+ fit$bestpar <- res$bestpar
+ fit$costfn <- costWithStatus
+ parms.all <- c(fit$par, parms.fixed)
+
+ data <- observed
+ data$err <- rep(NA, length(data$time))
+
+ fit$seed <- seed
+ fit$res <- res
+
+ np <- length(parms.all)
+ fit$rank <- np
+ fit$df.residual <- length(fit$residuals) - fit$rank
+
+ class(fit) <- c("CakeMcmcFit", "mkinfit", "modFit") # Note different class to other optimisers
+ return(list(fit = fit, parms.all = parms.all, data = data))
+ })
+}
# Summarise a fit
-summary.CakeMcmcFit <- function(object, data = TRUE, distimes = TRUE, halflives = TRUE, ff = TRUE, cov = FALSE,...) {
- param <- object$par
- pnames <- names(param)
- p <- length(param)
- #covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance
- mcmc <- object$res
- covar <- cov(mcmc$pars)
-
- rdf <- object$df.residual
-
+# The MCMC summary is separate from the others due to the difference in the outputs of the modMCMC and modFit.
+summary.CakeMcmcFit <- function(object, data = TRUE, distimes = TRUE, halflives = TRUE, ff = TRUE, cov = FALSE, ...) {
+ param <- object$par
+ pnames <- names(param)
+ p <- length(param)
+ #covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance
+ mcmc <- object$res
+ covar <- cov(mcmc$pars)
+
+ rdf <- object$df.residual
+
message <- "ok"
- rownames(covar) <- colnames(covar) <-pnames
-
+ rownames(covar) <- colnames(covar) <- pnames
+
#se <- sqrt(diag(covar) * resvar)
- fnse <- function(x) sd(mcmc$pars[,x]) #/sqrt(length(mcmc$pars[,x]))
- se <- sapply(dimnames(mcmc$pars)[[2]],fnse)
-
- tval <- param / se
+ fnse <- function(x) sd(mcmc$pars[, x]) #/sqrt(length(mcmc$pars[,x]))
+ se <- sapply(dimnames(mcmc$pars)[[2]], fnse)
+
+ tval <- param / se
- if (!all(object$start$lower >=0)) {
- message <- "Note that the one-sided t-test may not be appropriate if
- parameter values below zero are possible."
- warning(message)
- } else message <- "ok"
+ if (!all(object$start$lower >= 0)) {
+ message <- "Note that the one-sided t-test may not be appropriate if
+ parameter values below zero are possible."
+ warning(message)
+ } else message <- "ok"
# Filter the values for t-test, only apply t-test to k-values
- t.names <- grep("k(\\d+)|k_(.*)", names(tval), value = TRUE)
- t.rest <- setdiff(names(tval), t.names)
+ t.names <- grep("k(\\d+)|k_(.*)", names(tval), value = TRUE)
+ t.rest <- setdiff(names(tval), t.names)
t.values <- c(tval)
t.values[t.rest] <- NA
t.result <- pt(t.values, rdf, lower.tail = FALSE)
-
+
# Now set the values we're not interested in for the lower
# and upper bound we're not interested in to NA
t.param <- c(param)
t.param[t.names] <- NA
# calculate the 90% confidence interval
alpha <- 0.10
- lci90 <- t.param + qt(alpha/2,rdf)*se
- uci90 <- t.param + qt(1-alpha/2,rdf)*se
-
+ lci90 <- t.param + qt(alpha / 2, rdf) * se
+ uci90 <- t.param + qt(1 - alpha / 2, rdf) * se
+
# calculate the 95% confidence interval
alpha <- 0.05
- lci95 <- t.param + qt(alpha/2,rdf)*se
- uci95 <- t.param + qt(1-alpha/2,rdf)*se
+ lci95 <- t.param + qt(alpha / 2, rdf) * se
+ uci95 <- t.param + qt(1 - alpha / 2, rdf) * se
param <- cbind(param, se, tval, t.result, lci90, uci90, lci95, uci95)
dimnames(param) <- list(pnames, c("Estimate", "Std. Error",
"t value", "Pr(>t)", "Lower CI (90%)", "Upper CI (90%)", "Lower CI (95%)", "Upper CI (95%)"))
-
- # Residuals from mean of MCMC fit
- resvar <- object$ssr/ rdf
- modVariance <- object$ssr / length(object$data$residual)
-
- ans <- list(ssr = object$ssr,
+
+ # Residuals from mean of MCMC fit
+ resvar <- object$ssr / rdf
+ modVariance <- object$ssr / length(object$data$residual)
+
+ ans <- list(ssr = object$ssr,
residuals = object$data$residuals,
residualVariance = resvar,
sigma = sqrt(resvar),
@@ -305,24 +277,24 @@ summary.CakeMcmcFit <- function(object, data = TRUE, distimes = TRUE, halflives
info = object$info, niter = object$iterations,
stopmess = message,
par = param)
-
- ans$diffs <- object$diffs
- ans$data <- object$data
- ans$additionalstats <- CakeAdditionalStats(object$data)
- ans$seed <- object$seed
-
- ans$start <- object$start
- ans$fixed <- object$fixed
- ans$errmin <- object$errmin
- ans$penalties <- object$penalties
- if(distimes){
- ans$distimes <- object$distimes
- ans$extraDT50 <- object$extraDT50
- ans$ioreRepDT <- object$ioreRepDT
- ans$fomcRepDT <- object$fomcRepDT
- }
- if(halflives) ans$halflives <- object$halflives
- if(ff) ans$ff <- object$ff
- class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.modFit")
- return(ans)
+
+ ans$diffs <- object$diffs
+ ans$data <- object$data
+ ans$additionalstats <- CakeAdditionalStats(object$data)
+ ans$seed <- object$seed
+
+ ans$start <- object$start
+ ans$fixed <- object$fixed
+ ans$errmin <- object$errmin
+ ans$penalties <- object$penalties
+ if (distimes) {
+ ans$distimes <- object$distimes
+ ans$extraDT50 <- object$extraDT50
+ ans$ioreRepDT <- object$ioreRepDT
+ ans$fomcRepDT <- object$fomcRepDT
+ }
+ if (halflives) ans$halflives <- object$halflives
+ if (ff) ans$ff <- object$ff
+ class(ans) <- c("summary.CakeFit", "summary.mkinfit", "summary.modFit")
+ return(ans)
}

Contact - Imprint