# $Id$ # The CAKE R modules are based on mkin, # Based on mcmckinfit as modified by Bayer # Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011 Syngenta # Author: Rob Nelson, Tamar Christina # Tessella Project Reference: 6245, 7247 # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see .” CakeMcmcFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)), state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), lower = 0, upper = Inf, fixed_parms = NULL, fixed_initials = names(mkinmod$diffs)[-1], plot = FALSE, quiet = FALSE, err = NULL, weight = "none", scaleVar = FALSE, commonsigma=FALSE,jump = NULL,prior = NULL, wvar0=0.1,niter = 1000, outputlength = niter, burninlength = 0, updatecov = niter, ntrydr = 1, drscale = NULL, verbose = TRUE,fitstart=TRUE,seed=NULL,atol=1e-6, sannMaxIter = 10000, control=list(), useSolnp = FALSE, method='L-BFGS-B',...) { NAind <-which(is.na(observed$value)) mod_vars <- names(mkinmod$diffs) observed <- subset(observed, name %in% names(mkinmod$map)) ERR <- rep(1,nrow(observed)) observed <- cbind(observed,err=ERR) obs_vars = unique(as.character(observed$name)) if (is.null(names(parms.ini))) names(parms.ini) <- mkinmod$parms mkindiff <- function(t, state, parms) { time <- t diffs <- vector() for (box in mod_vars) { diffname <- paste("d", box, sep = "_") diffs[diffname] <- with(as.list(c(time, state, parms)), eval(parse(text = mkinmod$diffs[[box]]))) } return(list(c(diffs))) } if (is.null(names(state.ini))) names(state.ini) <- mod_vars parms.fixed <- parms.ini[fixed_parms] optim_parms <- setdiff(names(parms.ini), fixed_parms) parms.optim <- parms.ini[optim_parms] state.ini.fixed <- state.ini[fixed_initials] optim_initials <- setdiff(names(state.ini), fixed_initials) state.ini.optim <- state.ini[optim_initials] flag <- 0 state.ini.optim.boxnames <- names(state.ini.optim) if (length(state.ini.optim) > 0) { names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep = "_") } costFunctions <- CakeInternalCostFunctions(mkinmod, state.ini.optim, state.ini.optim.boxnames, state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, quiet, atol=atol) bestIteration <- -1; costWithStatus <- function(P, ...){ r <- costFunctions$cost(P) if(r$cost == costFunctions$get.best.cost()) { bestIteration<<-costFunctions$get.calls(); cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n') } cat("MCMC Call no.", costFunctions$get.calls(), '\n') return(r) } # Set the seed if ( is.null(seed) ) { # No seed so create a random one so there is something to report seed = runif(1,0,2^31-1) } seed = as.integer(seed) set.seed(seed) if(fitstart==TRUE) { ## ############# Get Initial Paramtervalues ############# ## Start with no weighting if(useSolnp) { pnames=names(c(state.ini.optim, parms.optim)) fn <- function(P){ names(P) <- pnames FF<<-cost(P) return(FF$model)} a <- try(fit <- solnp(c(state.ini.optim, parms.optim),fun=fn,LB=lower,UB=upper,control=control),silent=TRUE) flag <- 1 optimmethod <- 'solnp' if(class(a) == "try-error") { print('solnp fails, try PORT or other algorithm by users choice, might take longer time. Do something else!') warning('solnp fails, switch to PORT or other algorithm by users choice') a <- try(fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='Port',control=control)) flag <- 0 if(class(a) == "try-error") { fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method=method,control=control) } ## now using submethod already } } else { fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper,...) } if(commonsigma==TRUE){ #observed$err <- rep(1,nrow(observed)) if(flag==1 && useSolnp)## fit from solnp { ## run a small loop with 'Marq' or some other method fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, method='Port',control=control) } cov0 <- summary(fit)$cov.scaled*2.4^2/length(fit$par) costFunctions$set.calls(0); costFunctions$reset.best.cost() res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose) #res <- modMCMC(cost,fit$par,lower=lower,upper=upper,niter=niter) }else{ ## ############## One reweighted estimation ############################ ## Estimate the error variance(sd) tmpres <- fit$residuals oldERR <- observed$err err <- rep(NA,length(mod_vars)) for(i in 1:length(mod_vars)) { box <- mod_vars[i] ind <- which(names(tmpres)==box) tmp <- tmpres[ind] err[i] <- sd(tmp) } names(err) <- mod_vars ERR <- err[as.character(observed$name)] observed$err <-ERR costFunctions$set.error(ERR) if(flag==1 && useSolnp)## fit from solnp { fit$ssr <- fit$values[length(fit$values)] fit$residuals <-FF$residual$res ## mean square per varaible if (class(FF) == "modCost") { names(fit$residuals) <- FF$residuals$name fit$var_ms <- FF$var$SSR/FF$var$N fit$var_ms_unscaled <- FF$var$SSR.unscaled/FF$var$N fit$var_ms_unweighted <- FF$var$SSR.unweighted/FF$var$N names(fit$var_ms_unweighted) <- names(fit$var_ms_unscaled) <- names(fit$var_ms) <- FF$var$name } else fit$var_ms <- fit$var_ms_unweighted <- fit$var_ms_unscaled <- NA } olderr <- rep(1,length(mod_vars)) diffsigma <- sum((err-olderr)^2) ## At least do one iteration step to get a weighted LS fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, ...) ## Use this as the Input for MCMC algorithm ## ########################## fs <- summary(fit) cov0 <- if(all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled*2.4^2/length(fit$par) var0 <- fit$var_ms_unweighted costFunctions$set.calls(0); costFunctions$reset.best.cost() res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose) } }else { stop('fitstart=FALSE code removed until needed') } # Construct the fit object to return fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) fit$start$type = c(rep("state", length(state.ini.optim)), rep("deparm", length(parms.optim))) fit$start$lower <- lower fit$start$upper <- upper fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) fit$fixed$type = c(rep("state", length(state.ini.fixed)), rep("deparm", length(parms.fixed))) fit$mkindiff <- mkindiff fit$map <- mkinmod$map fit$diffs <- mkinmod$diffs # mkin_long_to_wide does not handle ragged data fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time") #names(fit$observed) <- c("time", as.vector(unique(observed$name))) fns <- function(str) { fields <- strsplit(str, "\\.")[[1]] if (fields[1] == "value") { return(fields[2]) } else { return(str) } } names(fit$observed) <- sapply(names(fit$observed), fns) # Replace mean from modFit with mean from modMCMC fnm <- function(x) mean(res$pars[,x]) fit$par <- sapply(dimnames(res$pars)[[2]],fnm) fit$bestpar <- res$bestpar fit$costfn <- costWithStatus # Disappearence times parms.all <- c(fit$par, parms.fixed) obs_vars = unique(as.character(observed$name)) fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)), row.names = obs_vars) fit$extraDT50<- data.frame(DT50 = rep(NA, 2), row.names = c("k1", "k2")) for (obs_var in obs_vars) { type = names(mkinmod$map[[obs_var]])[1] fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all,sannMaxIter) } fit$extraDT50[ ,c("DT50")] = CakeExtraDT(type,parms.all) # Fits to data odeini <- state.ini odeparms <- parms.all # If this step modelled the start amount, include it. Lookup in vectors returns NA not null if missing if(!is.na(odeparms["Parent_0"])) { odeini['Parent'] = odeparms["Parent_0"] } # Solve the system evalparse <- function(string) { eval(parse(text=string), as.list(c(odeparms, odeini))) } # Ensure initial state is at time 0 obstimes <- unique(c(0,observed$time)) out <- ode( y = odeini, times = obstimes, func = mkindiff, parms = odeparms, ) # Output transformation for models with unobserved compartments like SFORB out_transformed <- data.frame(time = out[,"time"]) for (var in names(mkinmod$map)) { if(length(mkinmod$map[[var]]) == 1) { out_transformed[var] <- out[, var] } else { out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) } } fit$predicted <- out_transformed fit$penalties <- CakePenaltiesLong(odeparms, out_transformed, observed) predicted_long <- mkin_wide_to_long(out_transformed,time="time") obs_vars = unique(as.character(observed$name)) fit$errmin <- CakeChi2(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini) data<-observed data$err<-rep(NA,length(data$time)) data<-merge(data, predicted_long, by=c("time","name")) names(data)<-c("time", "variable", "observed","err-var", "predicted") data$residual<-data$observed-data$predicted data$variable <- ordered(data$variable, levels = obs_vars) fit$data <- data[order(data$variable, data$time), ] sq <- data$residual * data$residual fit$ssr <- sum(sq) fit$seed = seed fit$res <- res class(fit) <- c("CakeMcmcFit", "mkinfit", "modFit") return(fit) } # Summarise a fit summary.CakeMcmcFit <- function(object, data = TRUE, distimes = TRUE, halflives = TRUE, ff = TRUE, cov = FALSE,...) { param <- object$par pnames <- names(param) p <- length(param) #covar <- try(solve(0.5*object$hessian), silent = TRUE) # unscaled covariance mcmc <- object$res covar <- cov(mcmc$pars) rdf <- object$df.residual message <- "ok" rownames(covar) <- colnames(covar) <-pnames #se <- sqrt(diag(covar) * resvar) fnse <- function(x) sd(mcmc$pars[,x]) #/sqrt(length(mcmc$pars[,x])) se <- sapply(dimnames(mcmc$pars)[[2]],fnse) tval <- param / se if (!all(object$start$lower >=0)) { message <- "Note that the one-sided t-test may not be appropriate if parameter values below zero are possible." warning(message) } else message <- "ok" # Filter the values for t-test, only apply t-test to k-values t.names <- grep("k(\\d+)|k_(.*)", names(tval), value = TRUE) t.rest <- setdiff(names(tval), t.names) t.values <- c(tval) t.values[t.rest] <- NA t.result <- pt(t.values, rdf, lower.tail = FALSE) # Now set the values we're not interested in for the lower # and upper bound we're not interested in to NA t.param = c(param) t.param[t.names] <- NA # calculate the 90% confidence interval alpha <- 0.10 lci90 <- t.param + qt(alpha/2,rdf)*se uci90 <- t.param + qt(1-alpha/2,rdf)*se # calculate the 95% confidence interval alpha <- 0.05 lci95 <- t.param + qt(alpha/2,rdf)*se uci95 <- t.param + qt(1-alpha/2,rdf)*se param <- cbind(param, se, tval, t.result, lci90, uci90, lci95, uci95) dimnames(param) <- list(pnames, c("Estimate", "Std. Error", "t value", "Pr(>t)", "Lower CI (90%)", "Upper CI (90%)", "Lower CI (95%)", "Upper CI (95%)")) # Residuals from mean of MCMC fit resvar <- object$ssr/ rdf modVariance <- object$ssr / length(object$data$residual) ans <- list(residuals = object$data$residuals, residualVariance = resvar, sigma = sqrt(resvar), modVariance = modVariance, df = c(p, rdf), cov.unscaled = covar, cov.scaled = covar * resvar, info = object$info, niter = object$iterations, stopmess = message, par = param) ans$diffs <- object$diffs ans$data <- object$data ans$additionalstats = CakeAdditionalStats(object$data) ans$seed <- object$seed ans$start <- object$start ans$fixed <- object$fixed ans$errmin <- object$errmin if(distimes) ans$distimes <- object$distimes if(halflives) ans$halflives <- object$halflives if(ff) ans$ff <- object$ff if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.modFit") return(ans) }