summaryrefslogblamecommitdiff
path: root/CakeMcmcFit.R
blob: f9a340e846bc5f79372e4071dedf4a5b6581837b (plain) (tree)
1
2
3
4
5
6
      


                                                                                    

                                        




















                                                                                         


                                                                                 



























                                                                    
             

















                                                                                                       
 











                                                                      






























                                                                                                                                                     

                                            




                                                                                                                 




















                                                                                                                                                                                                                                                              














                                                                                 






















































                                                                                                                                                                                                                                                              
                                                                                

                                               
                                                                            

     
                                                            




































                                                                                                         
                                                                                                                          




















                                                                        
                                                                                                                     























                                                                                  
















                                                                   
                 





                                                                                                                                 























                                                              
                                                 




                                                                         
# $Id$
# The CAKE R modules are based on mkin, 
# Based on mcmckinfit as modified by Bayer
# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011  Syngenta
# Author: Rob Nelson, Tamar Christina
# Tessella Project Reference: 6245, 7247

#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
# 
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
# 
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.”

CakeMcmcFit <- function (mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)), 
    state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), lower = 0, 
    upper = Inf, fixed_parms = NULL, fixed_initials = names(mkinmod$diffs)[-1], 
    plot = FALSE, quiet = FALSE, err = NULL, weight = "none", 
    scaleVar = FALSE, commonsigma=FALSE,jump = NULL,prior = NULL,
    wvar0=0.1,niter = 1000, 
    outputlength = niter, burninlength = 0, updatecov = niter, 
    ntrydr = 1, drscale = NULL, verbose = TRUE,fitstart=TRUE,seed=NULL,atol=1e-6,
    sannMaxIter = 10000, control=list(),
    useSolnp = FALSE, method='L-BFGS-B',...) 
{

    NAind <-which(is.na(observed$value))
    mod_vars <- names(mkinmod$diffs)
    observed <- subset(observed, name %in% names(mkinmod$map))
    ERR <- rep(1,nrow(observed))
    observed <- cbind(observed,err=ERR)
    obs_vars = unique(as.character(observed$name))
    if (is.null(names(parms.ini))) 
        names(parms.ini) <- mkinmod$parms
    mkindiff <- function(t, state, parms) {
        time <- t
        diffs <- vector()
        for (box in mod_vars) {
            diffname <- paste("d", box, sep = "_")
            diffs[diffname] <- with(as.list(c(time, state, parms)), 
                eval(parse(text = mkinmod$diffs[[box]])))
        }
        return(list(c(diffs)))
    }
    if (is.null(names(state.ini))) 
        names(state.ini) <- mod_vars
    parms.fixed <- parms.ini[fixed_parms]
    optim_parms <- setdiff(names(parms.ini), fixed_parms)
    parms.optim <- parms.ini[optim_parms]
    state.ini.fixed <- state.ini[fixed_initials]
    optim_initials <- setdiff(names(state.ini), fixed_initials)
    state.ini.optim <- state.ini[optim_initials]
    flag <- 0
    state.ini.optim.boxnames <- names(state.ini.optim)
    if (length(state.ini.optim) > 0) {
        names(state.ini.optim) <- paste(names(state.ini.optim), 
            "0", sep = "_")
    }
   
   	costFunctions <- CakeInternalCostFunctions(mkinmod, state.ini.optim, state.ini.optim.boxnames, 
                        state.ini.fixed, parms.fixed, observed, mkindiff, scaleVar, quiet, atol=atol)
	bestIteration <- -1;
	costWithStatus <- function(P, ...){
		r <- costFunctions$cost(P)
		if(r$cost == costFunctions$get.best.cost()) {
			bestIteration<<-costFunctions$get.calls();
			 cat(' MCMC best so far: c', r$cost, 'it:', bestIteration, '\n')
		}
		cat("MCMC Call no.", costFunctions$get.calls(), '\n')
		return(r)
	}

    # Set the seed
    if ( is.null(seed) ) {
      # No seed so create a random one so there is something to report
      seed = runif(1,0,2^31-1)
    }
    seed = as.integer(seed)
    set.seed(seed)
    
    if(fitstart==TRUE)
      {
     ## ############# Get Initial Paramtervalues   #############
    ## Start with no weighting
    
    if(useSolnp)
    {
        pnames=names(c(state.ini.optim, parms.optim))
        fn <- function(P){
            names(P) <- pnames
            FF<<-cost(P)
            return(FF$model)}
        a <- try(fit <- solnp(c(state.ini.optim, parms.optim),fun=fn,LB=lower,UB=upper,control=control),silent=TRUE)
        flag <- 1
        optimmethod <- 'solnp'
        if(class(a) == "try-error")
        {
            print('solnp fails, try PORT or other algorithm by users choice, might take longer time. Do something else!')
            warning('solnp fails, switch to  PORT or other algorithm by users choice')
            a <- try(fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method='Port',control=control))
            flag <- 0
            
            if(class(a) == "try-error")
            {
                fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, method=method,control=control)
            }
            ## now using submethod already
        }
    }
    else
    {
        fit <- modFit(costFunctions$cost, c(state.ini.optim, parms.optim), lower = lower, 
                          upper = upper,...)
	}
    
    if(commonsigma==TRUE){
      #observed$err <- rep(1,nrow(observed))
      if(flag==1 && useSolnp)## fit from solnp
      {
          ## run a small loop with 'Marq' or some other method
          fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, method='Port',control=control)
      }
      cov0 <- summary(fit)$cov.scaled*2.4^2/length(fit$par)
	  costFunctions$set.calls(0); costFunctions$reset.best.cost()
      res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose)
      #res <- modMCMC(cost,fit$par,lower=lower,upper=upper,niter=niter)
    }else{
      ## ############## One reweighted estimation ############################ 
      ## Estimate the error variance(sd)     
      tmpres <- fit$residuals
      oldERR <- observed$err
      err <- rep(NA,length(mod_vars))
      for(i in 1:length(mod_vars))
        {
          box <- mod_vars[i]
          ind <- which(names(tmpres)==box)
          tmp <- tmpres[ind]
          err[i] <- sd(tmp)
        }
      names(err) <- mod_vars
      ERR <- err[as.character(observed$name)]
      observed$err <-ERR
	  costFunctions$set.error(ERR)
      if(flag==1 && useSolnp)## fit from solnp
      {
          fit$ssr <- fit$values[length(fit$values)]
          fit$residuals <-FF$residual$res
          ## mean square per varaible
          if (class(FF) == "modCost") {
              names(fit$residuals)  <- FF$residuals$name
              fit$var_ms            <- FF$var$SSR/FF$var$N
              fit$var_ms_unscaled   <- FF$var$SSR.unscaled/FF$var$N
              fit$var_ms_unweighted <- FF$var$SSR.unweighted/FF$var$N
      
              names(fit$var_ms_unweighted) <- names(fit$var_ms_unscaled) <-
                  names(fit$var_ms) <- FF$var$name
          } else fit$var_ms <- fit$var_ms_unweighted <- fit$var_ms_unscaled <- NA
      }
      olderr <- rep(1,length(mod_vars))
      diffsigma <- sum((err-olderr)^2)
      ## At least do one iteration step to get a weighted LS
      fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, ...)
      ## Use this as the Input for MCMC algorithm
      ## ##########################
	  fs <- summary(fit)
      cov0 <- if(all(is.na(fs$cov.scaled))) NULL else fs$cov.scaled*2.4^2/length(fit$par)
      var0 <- fit$var_ms_unweighted
	  costFunctions$set.calls(0); costFunctions$reset.best.cost()
      res <- modMCMC(costWithStatus,fit$par,...,jump=cov0,lower=lower,upper=upper,prior=prior,var0=var0,wvar0=wvar0,niter=niter,outputlength = outputlength, burninlength = burninlength, updatecov = updatecov,ntrydr=ntrydr,drscale=drscale,verbose=verbose)
   
    }
   }else {
		stop('fitstart=FALSE code removed until needed')
	}
    # Construct the fit object to return
    fit$start <- data.frame(initial = c(state.ini.optim, parms.optim))
    fit$start$type = c(rep("state", length(state.ini.optim)), 
        rep("deparm", length(parms.optim)))
    fit$start$lower <- lower
    fit$start$upper <- upper
    fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed))
    fit$fixed$type = c(rep("state", length(state.ini.fixed)), 
        rep("deparm", length(parms.fixed)))

    fit$mkindiff <- mkindiff
    fit$map <- mkinmod$map
    fit$diffs <- mkinmod$diffs

    # mkin_long_to_wide does not handle ragged data
    fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time")
    #names(fit$observed) <- c("time", as.vector(unique(observed$name)))
    fns <- function(str) {
      fields <- strsplit(str, "\\.")[[1]]
      if (fields[1] == "value") {
         return(fields[2])
      }
      else {
         return(str)
      }
    }
    names(fit$observed) <- sapply(names(fit$observed), fns)
   
   # Replace mean from modFit with mean from modMCMC
   fnm <- function(x) mean(res$pars[,x])
   fit$par <- sapply(dimnames(res$pars)[[2]],fnm)
   fit$bestpar <- res$bestpar
   fit$costfn <- costWithStatus
   
   # Disappearence times
   parms.all <- c(fit$par, parms.fixed)
   obs_vars = unique(as.character(observed$name))
   fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), 
        DT90 = rep(NA, length(obs_vars)), row.names = obs_vars)
   fit$extraDT50<- data.frame(DT50 = rep(NA, 2), row.names = c("k1", "k2"))     
    for (obs_var in obs_vars) {
        type = names(mkinmod$map[[obs_var]])[1]
        fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all,sannMaxIter)
    }

    fit$extraDT50[ ,c("DT50")] = CakeExtraDT(type,parms.all)
    
   # Fits to data
   odeini <- state.ini
   odeparms <- parms.all
   # If this step modelled the start amount, include it. Lookup in vectors returns NA not null if missing
   if(!is.na(odeparms["Parent_0"])) { odeini['Parent'] = odeparms["Parent_0"] }
   
   # Solve the system
   evalparse <- function(string)
   {
      eval(parse(text=string), as.list(c(odeparms, odeini)))
   }
  
   # Ensure initial state is at time 0
   obstimes <- unique(c(0,observed$time))

   out <- ode(
      y = odeini,
      times = obstimes,
      func = mkindiff, 
      parms = odeparms,
    )
    
   # Output transformation for models with unobserved compartments like SFORB
   out_transformed <- data.frame(time = out[,"time"])
   for (var in names(mkinmod$map)) {
      if(length(mkinmod$map[[var]]) == 1) {
         out_transformed[var] <- out[, var]
      } else {
         out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]])
      }
   }
   fit$predicted <- out_transformed
   fit$penalties <- CakePenaltiesLong(odeparms, out_transformed, observed)

   predicted_long <- mkin_wide_to_long(out_transformed,time="time")
   obs_vars = unique(as.character(observed$name))
   fit$errmin <- CakeChi2(mkinmod, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini)

   data<-observed
   data$err<-rep(NA,length(data$time))
   data<-merge(data, predicted_long, by=c("time","name"))
   names(data)<-c("time", "variable", "observed","err-var", "predicted")
   data$residual<-data$observed-data$predicted
   data$variable <- ordered(data$variable, levels = obs_vars)
   fit$data <- data[order(data$variable, data$time), ]

   sq <- data$residual * data$residual
   fit$ssr <- sum(sq)
   
   fit$seed = seed
   
    fit$res <- res
    class(fit) <- c("CakeMcmcFit", "mkinfit", "modFit")
    return(fit)
}


# Summarise a fit
summary.CakeMcmcFit <- function(object, data = TRUE, distimes = TRUE, halflives = TRUE, ff = TRUE, cov = FALSE,...) {
  param  <- object$par
  pnames <- names(param)
  p      <- length(param)
  #covar  <- try(solve(0.5*object$hessian), silent = TRUE)   # unscaled covariance
  mcmc <- object$res
  covar <- cov(mcmc$pars)

  rdf    <- object$df.residual
  
    message <- "ok"
    rownames(covar) <- colnames(covar) <-pnames
    
    #se     <- sqrt(diag(covar) * resvar)
    fnse <- function(x) sd(mcmc$pars[,x])	#/sqrt(length(mcmc$pars[,x]))
    se <- sapply(dimnames(mcmc$pars)[[2]],fnse)
    
    tval      <- param / se

  if (!all(object$start$lower >=0)) {
    message <- "Note that the one-sided t-test may not be appropriate if
      parameter values below zero are possible."
    warning(message)
  } else message <- "ok"

    # Filter the values for t-test, only apply t-test to k-values  
    t.names  <- grep("k(\\d+)|k_(.*)", names(tval), value = TRUE)
    t.rest   <- setdiff(names(tval), t.names)
    t.values <- c(tval)
    t.values[t.rest] <- NA
    t.result <- pt(t.values, rdf, lower.tail = FALSE)
    
    # Now set the values we're not interested in for the lower 
    # and upper bound we're not interested in to NA
    t.param = c(param)
    t.param[t.names] <- NA
    # calculate the 90% confidence interval
    alpha <- 0.10
    lci90 <- t.param + qt(alpha/2,rdf)*se
    uci90 <- t.param + qt(1-alpha/2,rdf)*se
    
    # calculate the 95% confidence interval
    alpha <- 0.05
    lci95 <- t.param + qt(alpha/2,rdf)*se
    uci95 <- t.param + qt(1-alpha/2,rdf)*se

    param <- cbind(param, se, tval, t.result, lci90, uci90, lci95, uci95)
    dimnames(param) <- list(pnames, c("Estimate", "Std. Error",
                                    "t value", "Pr(>t)", "Lower CI (90%)", "Upper CI (90%)", "Lower CI (95%)", "Upper CI (95%)"))
       
   # Residuals from mean of MCMC fit
   resvar <- object$ssr/ rdf
   modVariance <- object$ssr / length(object$data$residual)
  
   ans <- list(residuals = object$data$residuals,
                residualVariance = resvar,
                sigma = sqrt(resvar),
                modVariance = modVariance,
                df = c(p, rdf), cov.unscaled = covar,
                cov.scaled = covar * resvar,
                info = object$info, niter = object$iterations,
                stopmess = message,
                par = param)
    
  ans$diffs <- object$diffs
  ans$data <- object$data
  ans$additionalstats = CakeAdditionalStats(object$data)
  ans$seed <- object$seed

  ans$start <- object$start
  ans$fixed <- object$fixed
  ans$errmin <- object$errmin 
  if(distimes) ans$distimes <- object$distimes
  if(halflives) ans$halflives <- object$halflives
  if(ff) ans$ff <- object$ff
  if(length(object$SFORB) != 0) ans$SFORB <- object$SFORB
  class(ans) <- c("summary.CakeFit","summary.mkinfit", "summary.modFit") 
  return(ans)  
}

Contact - Imprint