From be6d42ef636e8e1c9fdcfa6f8738ee10e885d75b Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Wed, 18 Oct 2017 10:17:59 +0200 Subject: Version 1.4 --- CakeMcmcFit.R | 292 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 292 insertions(+) create mode 100644 CakeMcmcFit.R (limited to 'CakeMcmcFit.R') diff --git a/CakeMcmcFit.R b/CakeMcmcFit.R new file mode 100644 index 0000000..209f904 --- /dev/null +++ b/CakeMcmcFit.R @@ -0,0 +1,292 @@ +# $Id: CakeMcmcFit.R 216 2011-07-05 14:35:03Z nelr $ +# The CAKE R modules are based on mkin, +# Based on mcmckinfit as modified by Bayer +# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta +# Author: Rob Nelson +# Tessella Project Reference: 6245 + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see .” + +CakeMcmcFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)), + state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), lower = 0, + upper = Inf, fixed_parms = NULL, fixed_initials = names(mkinmod$diffs)[-1], + plot = FALSE, quiet = FALSE, err = NULL, weight = "none", + scaleVar = FALSE, commonsigma=FALSE,jump = NULL,prior = NULL, + wvar0=0.1,niter = 1000, + outputlength = niter, burninlength = 0, updatecov = niter, + ntrydr = 1, drscale = NULL, verbose = TRUE,fitstart=TRUE,seed=NULL,atol=1e-6,...) +{ + + NAind <-which(is.na(observed$value)) + mod_vars <- names(mkinmod$diffs) + observed <- subset(observed, name %in% names(mkinmod$map)) + ERR <- rep(1,nrow(observed)) + observed <- cbind(observed,err=ERR) + obs_vars = unique(as.character(observed$name)) + if (is.null(names(parms.ini))) + names(parms.ini) <- mkinmod$parms + mkindiff <- function(t, state, parms) { + time <- t + diffs <- vector() + for (box in mod_vars) { + diffname <- paste("d", box, sep = "_") + diffs[diffname] <- with(as.list(c(time, state, parms)), + eval(parse(text = mkinmod$diffs[[box]]))) + } + return(list(c(diffs))) + } + if (is.null(names(state.ini))) + names(state.ini) <- mod_vars + parms.fixed <- parms.ini[fixed_parms] + optim_parms <- setdiff(names(parms.ini), fixed_parms) + parms.optim <- parms.ini[optim_parms] + state.ini.fixed <- state.ini[fixed_initials] + optim_initials <- setdiff(names(state.ini), fixed_initials) + state.ini.optim <- state.ini[optim_initials] + state.ini.optim.boxnames <- names(state.ini.optim) + if (length(state.ini.optim) > 0) { + names(state.ini.optim) <- paste(names(state.ini.optim), + "0", sep = "_") + } + + costFunctions <- CakeInternalCostFunctions(mkinmod, state.ini.optim, state.ini.optim.boxnames, + state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, quiet, atol=atol) + bestIteration <- -1; + costWithStatus <- function(P, ...){ + r <- costFunctions$cost(P) + if(r$cost == costFunctions$get.best.cost()) { + bestIteration<<-costFunctions$get.calls(); + cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n') + } + cat("MCMC Call no.", costFunctions$get.calls(), '\n') + return(r) + } + + # Set the seed + if ( is.null(seed) ) { + # No seed so create a random one so there is something to report + seed = runif(1,0,2^31-1) + } + seed = as.integer(seed) + set.seed(seed) + + if(fitstart==TRUE) + { + ## ############# Get Initial Paramtervalues ############# + ## Start with no weighting + fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, + upper = upper,...) + + if(commonsigma==TRUE){ + #observed$err <- rep(1,nrow(observed)) + cov0 <- summary(fit)$cov.scaled*2.4^2/length(fit$par) + costFunctions$set.calls(0); costFunctions$reset.best.cost() + res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose) + #res <- modMCMC(cost,fit$par,lower=lower,upper=upper,niter=niter) + }else{ + ## ############## One reweighted estimation ############################ + ## Estimate the error variance(sd) + tmpres <- fit$residuals + oldERR <- observed$err + err <- rep(NA,length(mod_vars)) + for(i in 1:length(mod_vars)) + { + box <- mod_vars[i] + ind <- which(names(tmpres)==box) + tmp <- tmpres[ind] + err[i] <- sd(tmp) + } + names(err) <- mod_vars + ERR <- err[as.character(observed$name)] + observed$err <-ERR + costFunctions$set.error(ERR) + olderr <- rep(1,length(mod_vars)) + diffsigma <- sum((err-olderr)^2) + ## At least do one iteration step to get a weighted LS + fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, ...) + ## Use this as the Input for MCMC algorithm + ## ########################## + fs <- summary(fit) + cov0 <- if(all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled*2.4^2/length(fit$par) + var0 <- fit$var_ms_unweighted + costFunctions$set.calls(0); costFunctions$reset.best.cost() + res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose) + + } + }else { + stop('fitstart=FALSE code removed until needed') + } + # Construct the fit object to return + fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) + fit$start$type = c(rep("state", length(state.ini.optim)), + rep("deparm", length(parms.optim))) + fit$start$lower <- lower + fit$start$upper <- upper + fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) + fit$fixed$type = c(rep("state", length(state.ini.fixed)), + rep("deparm", length(parms.fixed))) + + fit$mkindiff <- mkindiff + fit$map <- mkinmod$map + fit$diffs <- mkinmod$diffs + + # mkin_long_to_wide does not handle ragged data + fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") + #names(fit$observed) <- c("time", as.vector(unique(observed$name))) + fns <- function(str) { + fields <- strsplit(str, "\\.")[[1]] + if (fields[1] == "value") { + return(fields[2]) + } + else { + return(str) + } + } + names(fit$observed) <- sapply(names(fit$observed), fns) + + # Replace mean from modFit with mean from modMCMC + fnm <- function(x) mean(res$pars[,x]) + fit$par <- sapply(dimnames(res$pars)[[2]],fnm) + fit$bestpar <- res$bestpar + fit$costfn <- costWithStatus + + # Disappearence times + parms.all <- c(fit$par, parms.fixed) + obs_vars = unique(as.character(observed$name)) + fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), + DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) + for (obs_var in obs_vars) { + type = names(mkinmod$map[[obs_var]])[1] + fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all) + } + + + # Fits to data + odeini <- state.ini + odeparms <- parms.all + # If this step modelled the start amount, include it. Lookup in vectors returns NA not null if missing + if(!is.na(odeparms["Parent_0"])) { odeini['Parent'] = odeparms["Parent_0"] } + + # Solve the system + evalparse <- function(string) + { + eval(parse(text=string), as.list(c(odeparms, odeini))) + } + + # Ensure initial state is at time 0 + obstimes <- unique(c(0,observed$time)) + + out <- ode( + y = odeini, + times = obstimes, + func = mkindiff, + parms = odeparms, + ) + + # Output transformation for models with unobserved compartments like SFORB + out_transformed <- data.frame(time = out[,"time"]) + for (var in names(mkinmod$map)) { + if(length(mkinmod$map[[var]]) == 1) { + out_transformed[var] <- out[, var] + } else { + out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) + } + } + fit$predicted <- out_transformed + fit$penalties <- CakePenaltiesLong(odeparms, out_transformed, observed) + + predicted_long <- mkin_wide_to_long(out_transformed,time="time") + obs_vars = unique(as.character(observed$name)) + fit$errmin <- CakeChi2(observed, predicted_long, obs_vars, parms.optim, state.ini.optim) + + data<-observed + data$err<-rep(NA,length(data$time)) + data<-merge(data, predicted_long, by=c("time","name")) + names(data)<-c("time", "variable", "observed","err-var", "predicted") + data$residual<-data$observed-data$predicted + data$variable <- ordered(data$variable, levels = obs_vars) + fit$data <- data[order(data$variable, data$time), ] + + sq <- data$residual * data$residual + fit$ssr <- sum(sq) + + fit$seed = seed + + fit$res <- res + class(fit) <- c("CakeMcmcFit", "mkinfit", "modFit") + return(fit) +} + + +# Summarise a fit +summary.CakeMcmcFit <- function(object, data = TRUE, distimes = TRUE, ff = TRUE, cov = FALSE,...) { + param <- object$par + pnames <- names(param) + p <- length(param) + #covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance + mcmc <- object$res + covar <- cov(mcmc$pars) + + rdf <- object$df.residual + + message <- "ok" + rownames(covar) <- colnames(covar) <-pnames + + #se <- sqrt(diag(covar) * resvar) + fnse <- function(x) sd(mcmc$pars[,x]) #/sqrt(length(mcmc$pars[,x])) + se <- sapply(dimnames(mcmc$pars)[[2]],fnse) + + tval <- param / se + + if (!all(object$start$lower >=0)) { + message <- "Note that the one-sided t-test may not be appropriate if + parameter values below zero are possible." + warning(message) + } else message <- "ok" + + # Parameter fits + alpha <- 0.05 + lci <- param + qt(alpha/2,rdf)*se + uci <- param + qt(1-alpha/2,rdf)*se + param <- cbind(param, se, tval, pt(tval, rdf, lower.tail = FALSE), lci, uci) + dimnames(param) <- list(pnames, c("Estimate", "Std. Error", "t value", "Pr(>t)", "Lower CI", "Upper CI")) + + # Residuals from mean of MCMC fit + resvar <- object$ssr/ rdf + modVariance <- object$ssr / length(object$data$residual) + + ans <- list(residuals = object$data$residuals, + residualVariance = resvar, + sigma = sqrt(resvar), + modVariance = modVariance, + df = c(p, rdf), cov.unscaled = covar, + cov.scaled = covar * resvar, + info = object$info, niter = object$iterations, + stopmess = message, + par = param) + + ans$diffs <- object$diffs + ans$data <- object$data + ans$additionalstats = CakeAdditionalStats(object$data) + ans$seed <- object$seed + + ans$start <- object$start + ans$fixed <- object$fixed + ans$errmin <- object$errmin + if(distimes) ans$distimes <- object$distimes + if(ff) ans$ff <- object$ff + if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB + class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.modFit") + return(ans) +} -- cgit v1.2.1