From fd9ea462ba438c023e33902449429eae6d2dc28f Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Wed, 9 Nov 2022 10:08:37 +0100 Subject: Version 3.5, from a fresh installation --- CakeMcmcFit.R | 482 +++++++++++++++++++++++++++------------------------------- 1 file changed, 227 insertions(+), 255 deletions(-) mode change 100644 => 100755 CakeMcmcFit.R (limited to 'CakeMcmcFit.R') diff --git a/CakeMcmcFit.R b/CakeMcmcFit.R old mode 100644 new mode 100755 index be57cef..0a5bf4a --- a/CakeMcmcFit.R +++ b/CakeMcmcFit.R @@ -1,6 +1,7 @@ # Some of the CAKE R modules are based on mkin, # Based on mcmckinfit as modified by Bayer -# Modifications developed by Tessella for Syngenta: Copyright (C) 2011-2020 Syngenta +# Modifications developed by Hybrid Intelligence (formerly Tessella), part of +# Capgemini Engineering, for Syngenta, Copyright (C) 2011-2022 Syngenta # Tessella Project Reference: 6245, 7247, 8361, 7414, 10091 # The CAKE R modules are free software: you can redistribute it and/or modify @@ -32,270 +33,241 @@ # dfopDtMaxIter: The maximum number of iterations to apply to DFOP DT calculation. # control: ... # useExtraSolver: Whether to use the extra solver for this fit (only used for the initial first fit). -CakeMcmcFit <- function (cake.model, - observed, - parms.ini, - state.ini, - lower, - upper, - fixed_parms = NULL, - fixed_initials, - quiet = FALSE, - niter = 1000, - verbose = TRUE, - seed=NULL, - atol=1e-6, - dfopDtMaxIter = 10000, - control=list(), - useExtraSolver = FALSE, - ...) -{ - NAind <-which(is.na(observed$value)) - mod_vars <- names(cake.model$diffs) - observed <- subset(observed, name %in% names(cake.model$map)) - ERR <- rep(1,nrow(observed)) - observed <- cbind(observed,err=ERR) - obs_vars <- unique(as.character(observed$name)) - - if (is.null(names(parms.ini))) { - names(parms.ini) <- cake.model$parms - } - - mkindiff <- function(t, state, parms) { - time <- t - diffs <- vector() - - for (box in mod_vars) { - diffname <- paste("d", box, sep = "_") - diffs[diffname] <- with(as.list(c(time, state, parms)), - eval(parse(text = cake.model$diffs[[box]]))) +CakeMcmcFit <- function(cake.model, + observed, + parms.ini, + state.ini, + lower, + upper, + fixed_parms = NULL, + fixed_initials, + quiet = FALSE, + niter = 1000, + verbose = TRUE, + seed = NULL, + atol = 1e-6, + dfopDtMaxIter = 10000, + control = list(), + useExtraSolver = FALSE) { + + fit <- CakeFit("MCMC", + cake.model, + observed, + parms.ini, + state.ini, + lower, + upper, + fixed_parms, + fixed_initials, + quiet, + niter = niter, + verbose = verbose, + seed = seed, + atol = atol, + dfopDtMaxIter = dfopDtMaxIter, + control = control, + useExtraSolver = useExtraSolver) + + return(fit) +} + +GetMcmcSpecificSetup <- function() { + return(function( + cake.model, + state.ini.optim, + state.ini.optim.boxnames, + state.ini.fixed, + parms.fixed, + observed, + mkindiff, + quiet, + atol, + solution, + ...) { + seed <- list(...)$seed + + costFunctions <- CakeInternalCostFunctions(cake.model, state.ini.optim, state.ini.optim.boxnames, state.ini.fixed, + parms.fixed, observed, mkindiff = mkindiff, quiet, atol = atol, solution = solution) + + bestIteration <<- -1; + costWithStatus <- function(P, ...) { + r <- costFunctions$cost(P) + if (r$cost == costFunctions$get.best.cost()) { + bestIteration <<- costFunctions$get.calls(); + cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n') + } + + arguments <- list(...) + if (costFunctions$get.calls() <= arguments$maxCallNo) { + cat("MCMC Call no.", costFunctions$get.calls(), '\n') + } + + return(r) } - - return(list(c(diffs))) - } - - if (is.null(names(state.ini))) { - names(state.ini) <- mod_vars - } - - parms.fixed <- parms.ini[fixed_parms] - optim_parms <- setdiff(names(parms.ini), fixed_parms) - parms.optim <- parms.ini[optim_parms] - state.ini.fixed <- state.ini[fixed_initials] - optim_initials <- setdiff(names(state.ini), fixed_initials) - state.ini.optim <- state.ini[optim_initials] - state.ini.optim.boxnames <- names(state.ini.optim) - - if (length(state.ini.optim) > 0) { - names(state.ini.optim) <- paste(names(state.ini.optim), - "0", sep = "_") - } - - costFunctions <- CakeInternalCostFunctions(cake.model, state.ini.optim, state.ini.optim.boxnames, - state.ini.fixed, parms.fixed, observed, mkindiff, quiet, atol=atol) - bestIteration <- -1; - costWithStatus <- function(P, ...){ - r <- costFunctions$cost(P) - if(r$cost == costFunctions$get.best.cost()) { - bestIteration<<-costFunctions$get.calls(); - cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n') + + # Set the seed + if (is.null(seed)) { + # No seed so create a random one so there is something to report + seed <- runif(1, 0, 2 ^ 31 - 1) } - - arguments <- list(...) - if (costFunctions$get.calls() <= arguments$maxCallNo) - { - cat("MCMC Call no.", costFunctions$get.calls(), '\n') + + seed <- as.integer(seed) + set.seed(seed) + + return(list(costFunctions = costFunctions, costWithStatus = costWithStatus, maxIter = NULL, tol = NULL, seed = seed)) + }) +} + +GetMcmcOptimisationRoutine <- function() { + return(function(costFunctions, costForExtraSolver, useExtraSolver, parms, lower, upper, control, ...) { + mcmcArgs <- list(...) + cake.model <- mcmcArgs$cake.model + costWithStatus <- mcmcArgs$costWithStatus + observed <- mcmcArgs$observed + niter <- mcmcArgs$niter + verbose <- mcmcArgs$verbose + + # Runs a pre-fit with no weights first, followed by a weighted step (extra solver with first, not with second) + + # Run optimiser, no weighting + fitStepResult <- RunFitStep(costFunctions$cost, costForExtraSolver, useExtraSolver, parms, lower, upper, control) + fit <- fitStepResult$fit + fitted_with_extra_solver <- fitStepResult$fitted_with_extra_solver + + + # Process extra solver output if it was used + if (fitted_with_extra_solver) { + fit <- GetFitValuesAfterExtraSolver(fit, FF) } - return(r) - } + # One reweighted estimation + # Estimate the error variance(sd) + tmpres <- fit$residuals + oldERR <- observed$err + err <- rep(NA, length(cake.model$map)) - # Set the seed - if ( is.null(seed) ) { - # No seed so create a random one so there is something to report - seed <- runif(1,0,2^31-1) - } - - seed <- as.integer(seed) - set.seed(seed) - - ## ############# Get Initial Paramtervalues ############# - ## Start with no weighting - if(useExtraSolver) - { - a <- try(fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='Port',control=control)) - - if(class(a) == "try-error") - { - fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='L-BFGS-B',control=control) + for (i in 1:length(cake.model$map)) { + box <- names(cake.model$map)[i] + ind <- which(names(tmpres) == box) + tmp <- tmpres[ind] + err[i] <- sd(tmp) } - } - else - { - # modFit parameter transformations can explode if you put in parameters that are equal to a bound, so we move them away by a tiny amount. - all.optim <- ShiftAwayFromBoundaries(c(state.ini.optim, parms.optim), lower, upper) - - fit <- modFit(costFunctions$cost, all.optim, lower = lower, - upper = upper,...) - } - - ## ############## One reweighted estimation ############################ - ## Estimate the error variance(sd) - tmpres <- fit$residuals - oldERR <- observed$err - err <- rep(NA,length(cake.model$map)) - - for(i in 1:length(cake.model$map)) { - box <- names(cake.model$map)[i] - ind <- which(names(tmpres)==box) - tmp <- tmpres[ind] - err[i] <- sd(tmp) - } - - names(err) <- names(cake.model$map) - ERR <- err[as.character(observed$name)] - observed$err <-ERR - costFunctions$set.error(ERR) - - olderr <- rep(1,length(cake.model$map)) - diffsigma <- sum((err-olderr)^2) - ## At least do one iteration step to get a weighted LS - fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, ...) - ## Use this as the Input for MCMC algorithm - ## ########################## - fs <- summary(fit) - cov0 <- if(all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled*2.4^2/length(fit$par) - var0 <- fit$var_ms_unweighted - costFunctions$set.calls(0); costFunctions$reset.best.cost() - res <- modMCMC(costWithStatus, maxCallNo=niter, fit$par,...,jump=cov0,lower=lower,upper=upper,prior=NULL,var0=var0,wvar0=0.1,niter=niter,outputlength=niter,burninlength=0,updatecov=niter,ntrydr=1,drscale=NULL,verbose=verbose) - - # Construct the fit object to return - fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) - fit$start$type <- c(rep("state", length(state.ini.optim)), - rep("deparm", length(parms.optim))) - fit$start$lower <- lower - fit$start$upper <- upper - fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) - fit$fixed$type <- c(rep("state", length(state.ini.fixed)), - rep("deparm", length(parms.fixed))) - - fit$mkindiff <- mkindiff - fit$map <- cake.model$map - fit$diffs <- cake.model$diffs - - # Replace mean from modFit with mean from modMCMC - fnm <- function(x) mean(res$pars[,x]) - fit$par <- sapply(dimnames(res$pars)[[2]],fnm) - fit$bestpar <- res$bestpar - fit$costfn <- costWithStatus - - # Disappearence times - parms.all <- c(fit$par, parms.fixed) - obs_vars <- unique(as.character(observed$name)) - fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), - DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) - fit$extraDT50<- data.frame(k1 = rep(NA, length(names(cake.model$map))), k2 = rep(NA, length(names(cake.model$map))), row.names = names(cake.model$map)) - - for (compartment.name in names(cake.model$map)) { - type <- names(cake.model$map[[compartment.name]])[1] - fit$distimes[compartment.name, ] <- CakeDT(type,compartment.name,parms.all,dfopDtMaxIter) - fit$extraDT50[compartment.name, ] <- CakeExtraDT(type, compartment.name, parms.all) - } - - fit$ioreRepDT <- CakeIORERepresentativeDT("Parent", parms.all) - fit$fomcRepDT <- CakeFOMCBackCalculatedDT(parms.all) - - # Ensure initial state is at time 0 - obstimes <- unique(c(0,observed$time)) - - # Solve the system - out_predicted <- CakeOdeSolve(fit, obstimes, solution = "deSolve", atol) - out_transformed <- PostProcessOdeOutput(out_predicted, cake.model, atol) - - fit$predicted <- out_transformed - fit$penalties <- CakePenaltiesLong(parms.all, out_transformed, observed) - - predicted_long <- wide_to_long(out_transformed,time="time") - obs_vars <- unique(as.character(observed$name)) - fit$errmin <- CakeChi2(cake.model, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini, fit$fixed) - - data<-observed - data$err<-rep(NA,length(data$time)) - data<-merge(data, predicted_long, by=c("time","name")) - names(data)<-c("time", "variable", "observed","err-var", "predicted") - data$residual<-data$observed-data$predicted - data$variable <- ordered(data$variable, levels = obs_vars) - fit$data <- data[order(data$variable, data$time), ] - fit$atol <- atol - fit$topology <- cake.model$topology - - sq <- data$residual * data$residual - fit$ssr <- sum(sq) - - fit$seed <- seed - - fit$res <- res - class(fit) <- c("CakeMcmcFit", "mkinfit", "modFit") - return(fit) + + names(err) <- names(cake.model$map) + ERR <- err[as.character(observed$name)] + observed$err <- ERR + costFunctions$set.error(ERR) + + olderr <- rep(1, length(cake.model$map)) + diffsigma <- sum((err - olderr) ^ 2) + + ## At least do one iteration step to get a weighted LS + fit <- modFit(f = costFunctions$cost, p = fit$par, lower = lower, upper = upper) + + # Run MCMC optimiser with output from weighted fit + # Apply iterative re-weighting here (iterations fixed to 1 for now): + # Do modMCMC as below, we should also pass in the final priors for subsequent iterations + # Use modMCMC average as input to next modMCMC run (as done in block 3 to get final parameters) + # use errors from previous step as inputs to modMCMC cov0 and var0 at each iteration + + fs <- summary(fit) + cov0 <- if (all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled * 2.4 ^ 2 / length(fit$par) + var0 <- fit$var_ms_unweighted + costFunctions$set.calls(0); + costFunctions$reset.best.cost() + res <- modMCMC(f = costWithStatus, p = fit$par, maxCallNo = niter, jump = cov0, lower = lower, upper = upper, prior = NULL, var0 = var0, wvar0 = 0.1, niter = niter, outputlength = niter, burninlength = 0, updatecov = niter, ntrydr = 1, drscale = NULL, verbose = verbose) + return(list(fit = fitStepResult$fit, fitted_with_extra_solver = fitStepResult$fitted_with_extra_solver, res = res)) + }) } +GetMcmcSpecificWrapUp <- function() { + return(function(fit, ...) { + args <- list(...) + res <- args$res + seed <- args$seed + costWithStatus <- args$costWithStatus + observed <- args$observed + parms.fixed <- args$parms.fixed + + # Replace mean from modFit with mean from modMCMC + fnm <- function(x) mean(res$pars[, x]) + fit$par <- sapply(dimnames(res$pars)[[2]], fnm) + fit$bestpar <- res$bestpar + fit$costfn <- costWithStatus + parms.all <- c(fit$par, parms.fixed) + + data <- observed + data$err <- rep(NA, length(data$time)) + + fit$seed <- seed + fit$res <- res + + np <- length(parms.all) + fit$rank <- np + fit$df.residual <- length(fit$residuals) - fit$rank + + class(fit) <- c("CakeMcmcFit", "mkinfit", "modFit") # Note different class to other optimisers + return(list(fit = fit, parms.all = parms.all, data = data)) + }) +} # Summarise a fit -summary.CakeMcmcFit <- function(object, data = TRUE, distimes = TRUE, halflives = TRUE, ff = TRUE, cov = FALSE,...) { - param <- object$par - pnames <- names(param) - p <- length(param) - #covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance - mcmc <- object$res - covar <- cov(mcmc$pars) - - rdf <- object$df.residual - +# The MCMC summary is separate from the others due to the difference in the outputs of the modMCMC and modFit. +summary.CakeMcmcFit <- function(object, data = TRUE, distimes = TRUE, halflives = TRUE, ff = TRUE, cov = FALSE, ...) { + param <- object$par + pnames <- names(param) + p <- length(param) + #covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance + mcmc <- object$res + covar <- cov(mcmc$pars) + + rdf <- object$df.residual + message <- "ok" - rownames(covar) <- colnames(covar) <-pnames - + rownames(covar) <- colnames(covar) <- pnames + #se <- sqrt(diag(covar) * resvar) - fnse <- function(x) sd(mcmc$pars[,x]) #/sqrt(length(mcmc$pars[,x])) - se <- sapply(dimnames(mcmc$pars)[[2]],fnse) - - tval <- param / se + fnse <- function(x) sd(mcmc$pars[, x]) #/sqrt(length(mcmc$pars[,x])) + se <- sapply(dimnames(mcmc$pars)[[2]], fnse) + + tval <- param / se - if (!all(object$start$lower >=0)) { - message <- "Note that the one-sided t-test may not be appropriate if - parameter values below zero are possible." - warning(message) - } else message <- "ok" + if (!all(object$start$lower >= 0)) { + message <- "Note that the one-sided t-test may not be appropriate if + parameter values below zero are possible." + warning(message) + } else message <- "ok" # Filter the values for t-test, only apply t-test to k-values - t.names <- grep("k(\\d+)|k_(.*)", names(tval), value = TRUE) - t.rest <- setdiff(names(tval), t.names) + t.names <- grep("k(\\d+)|k_(.*)", names(tval), value = TRUE) + t.rest <- setdiff(names(tval), t.names) t.values <- c(tval) t.values[t.rest] <- NA t.result <- pt(t.values, rdf, lower.tail = FALSE) - + # Now set the values we're not interested in for the lower # and upper bound we're not interested in to NA t.param <- c(param) t.param[t.names] <- NA # calculate the 90% confidence interval alpha <- 0.10 - lci90 <- t.param + qt(alpha/2,rdf)*se - uci90 <- t.param + qt(1-alpha/2,rdf)*se - + lci90 <- t.param + qt(alpha / 2, rdf) * se + uci90 <- t.param + qt(1 - alpha / 2, rdf) * se + # calculate the 95% confidence interval alpha <- 0.05 - lci95 <- t.param + qt(alpha/2,rdf)*se - uci95 <- t.param + qt(1-alpha/2,rdf)*se + lci95 <- t.param + qt(alpha / 2, rdf) * se + uci95 <- t.param + qt(1 - alpha / 2, rdf) * se param <- cbind(param, se, tval, t.result, lci90, uci90, lci95, uci95) dimnames(param) <- list(pnames, c("Estimate", "Std. Error", "t value", "Pr(>t)", "Lower CI (90%)", "Upper CI (90%)", "Lower CI (95%)", "Upper CI (95%)")) - - # Residuals from mean of MCMC fit - resvar <- object$ssr/ rdf - modVariance <- object$ssr / length(object$data$residual) - - ans <- list(ssr = object$ssr, + + # Residuals from mean of MCMC fit + resvar <- object$ssr / rdf + modVariance <- object$ssr / length(object$data$residual) + + ans <- list(ssr = object$ssr, residuals = object$data$residuals, residualVariance = resvar, sigma = sqrt(resvar), @@ -305,24 +277,24 @@ summary.CakeMcmcFit <- function(object, data = TRUE, distimes = TRUE, halflives info = object$info, niter = object$iterations, stopmess = message, par = param) - - ans$diffs <- object$diffs - ans$data <- object$data - ans$additionalstats <- CakeAdditionalStats(object$data) - ans$seed <- object$seed - - ans$start <- object$start - ans$fixed <- object$fixed - ans$errmin <- object$errmin - ans$penalties <- object$penalties - if(distimes){ - ans$distimes <- object$distimes - ans$extraDT50 <- object$extraDT50 - ans$ioreRepDT <- object$ioreRepDT - ans$fomcRepDT <- object$fomcRepDT - } - if(halflives) ans$halflives <- object$halflives - if(ff) ans$ff <- object$ff - class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.modFit") - return(ans) + + ans$diffs <- object$diffs + ans$data <- object$data + ans$additionalstats <- CakeAdditionalStats(object$data) + ans$seed <- object$seed + + ans$start <- object$start + ans$fixed <- object$fixed + ans$errmin <- object$errmin + ans$penalties <- object$penalties + if (distimes) { + ans$distimes <- object$distimes + ans$extraDT50 <- object$extraDT50 + ans$ioreRepDT <- object$ioreRepDT + ans$fomcRepDT <- object$fomcRepDT + } + if (halflives) ans$halflives <- object$halflives + if (ff) ans$ff <- object$ff + class(ans) <- c("summary.CakeFit", "summary.mkinfit", "summary.modFit") + return(ans) } -- cgit v1.2.1