path: root/CakeOlsFit.R



# Originally: mkinfit.R 87 2010-12-09 07:31:59Z jranke $

# Based on code in mkinfit
# Portions Johannes Ranke 2010
# Contact: mkin-devel@lists.berlios.de
# The summary function is an adapted and extended version of summary.modFit
# from the FME package, v 1.1 by Soetart and Petzoldt, which was in turn
# inspired by summary.nls.lm

#$Id: CakeOlsFit.R 216 2011-07-05 14:35:03Z nelr $
# This version has been modified to expect SFO parameterised as k and flow fractions
# Modifications developed by Tessella Plc for Syngenta: Copyright (C) 2011  Syngenta
# Authors: Rob Nelson, Richard Smith
# Tessella Project Reference: 6245

#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
# 
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
# 
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.”

CakeOlsFit <- function(mkinmod, observed,
  parms.ini = rep(0.1, length(mkinmod$parms)),
  state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), 
  lower = 0, upper = Inf,
  fixed_parms = NULL,
  fixed_initials = names(mkinmod$diffs)[-1],
  eigen = TRUE,
  plot = FALSE, quiet = FALSE,
  err = NULL, weight = "none", scaleVar = FALSE,
  atol = 1e-6,
  ...)
{
  mod_vars <- names(mkinmod$diffs)
  # Subset dataframe with mapped (modelled) variables
  observed <- subset(observed, name %in% names(mkinmod$map))
  # Get names of observed variables
  obs_vars = unique(as.character(observed$name))

  # Name the parameters if they are not named yet
  if(is.null(names(parms.ini))) names(parms.ini) <- mkinmod$parms

  # Name the inital parameter values if they are not named yet
  if(is.null(names(state.ini))) names(state.ini) <- mod_vars

  # Parameters to be optimised
  parms.fixed <- parms.ini[fixed_parms]
  optim_parms <- setdiff(names(parms.ini), fixed_parms)
  parms.optim <- parms.ini[optim_parms]

  state.ini.fixed <- state.ini[fixed_initials]
  optim_initials <- setdiff(names(state.ini), fixed_initials)
  state.ini.optim <- state.ini[optim_initials]
  state.ini.optim.boxnames <- names(state.ini.optim)
  if(length(state.ini.optim) > 0) {
      names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_")
  }

  # Decide if the solution of the model can be based on a simple analytical
  # formula, the spectral decomposition of the matrix (fundamental system)
  # or a numeric ode solver from the deSolve package
  if (length(mkinmod$map) == 1) {
    solution = "analytical"
  } else {
    if (is.matrix(mkinmod$coefmat) & eigen) solution = "eigen"
    else solution = "deSolve"
  }

  # Create a function calculating the differentials specified by the model
  # if necessary
  if(solution == "deSolve") {
    mkindiff <- function(t, state, parms) {
      time <- t
      diffs <- vector()
      for (box in mod_vars)
      {
        diffname <- paste("d", box, sep="_")      
        diffs[diffname] <- with(as.list(c(time,state, parms)),
          eval(parse(text=mkinmod$diffs[[box]])))
      }
      return(list(c(diffs)))
    } 
  }

  cost.old <- 1e100
  calls <- 0
  out_predicted <- NA
  # Define the model cost function
  cost <- function(P)
  {
    assign("calls", calls+1, inherits=TRUE)
    if(length(state.ini.optim) > 0) {
      odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed)
      names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed))
    } else odeini <- state.ini.fixed

    odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], parms.fixed)

    # Ensure initial state is at time 0
    outtimes = unique(c(0,observed$time))
    evalparse <- function(string)
    {
      eval(parse(text=string), as.list(c(odeparms, odeini)))
    }

    # Solve the system
    if (solution == "analytical") {
      parent.type = names(mkinmod$map[[1]])[1]  
      parent.name = names(mkinmod$diffs)[[1]]
      o <- switch(parent.type,
        SFO = SFO.solution(outtimes, 
            evalparse(parent.name),
            evalparse(paste("k", parent.name, sep="_"))),
#            evalparse("k")),
#            evalparse(paste("k", parent.name, "sink", sep="_"))),
        FOMC = FOMC.solution(outtimes,
            evalparse(parent.name),
            evalparse("alpha"), evalparse("beta")),
        DFOP = DFOP.solution(outtimes,
            evalparse(parent.name),
            evalparse("k1"), evalparse("k2"),
            evalparse("g")),
        HS = HS.solution(outtimes,
            evalparse(parent.name),
            evalparse("k1"), evalparse("k2"),
            evalparse("tb")),
        SFORB = SFORB.solution(outtimes,
            evalparse(parent.name),
            evalparse(paste("k", parent.name, "bound", sep="_")),
            evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")),
            evalparse(paste("k", parent.name, "sink", sep="_")))
      )
      out <- cbind(outtimes, o)
      dimnames(out) <- list(outtimes, c("time", sub("_free", "", parent.name)))
    }
    if (solution == "eigen") {
      coefmat.num <- matrix(sapply(as.vector(mkinmod$coefmat), evalparse), 
        nrow = length(mod_vars))
      e <- eigen(coefmat.num)
      c <- solve(e$vectors, odeini)
      f.out <- function(t) {
        e$vectors %*% diag(exp(e$values * t), nrow=length(mod_vars)) %*% c
      }
      o <- matrix(mapply(f.out, outtimes), 
        nrow = length(mod_vars), ncol = length(outtimes))
      dimnames(o) <- list(mod_vars, outtimes)
      out <- cbind(time = outtimes, t(o))
    } 
    if (solution == "deSolve")  
    {
      out <- ode(
        y = odeini,
        times = outtimes,
        func = mkindiff, 
        parms = odeparms,
        atol = atol
      )
    }
  
    # Output transformation for models with unobserved compartments like SFORB
    out_transformed <- data.frame(time = out[,"time"])
    for (var in names(mkinmod$map)) {
      if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") {
        out_transformed[var] <- out[, var]
      } else {
        out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]])
      }
    }    
    assign("out_predicted", out_transformed, inherits=TRUE)

    mC <- CakeCost(out_transformed, observed, y = "value",
      err = err, weight = weight, scaleVar = scaleVar)
	mC$penalties <- CakePenalties(odeparms, out_transformed, observed)
	mC$model <- mC$cost + mC$penalties;
	if (mC$model < cost.old) {
		if (!quiet) 
			cat("Model cost at call ", calls, ": m", mC$cost, 'p:', mC$penalties, 'o:', mC$model,
			  "\n")

      # Plot the data and current model output if requested
      if(plot) {
        outtimes_plot = seq(min(observed$time), max(observed$time), length.out=100)
        if (solution == "analytical") {
          o_plot <- switch(parent.type,
            SFO = SFO.solution(outtimes_plot, 
                evalparse(parent.name),
                evalparse(paste("k", parent.name, sep="_"))),
#                evalparse(paste("k", parent.name, "sink", sep="_"))),
            FOMC = FOMC.solution(outtimes_plot,
                evalparse(parent.name),
                evalparse("alpha"), evalparse("beta")),
            DFOP = DFOP.solution(outtimes_plot,
                evalparse(parent.name),
                evalparse("k1"), evalparse("k2"),
                evalparse("g")),
            HS = HS.solution(outtimes_plot,
                evalparse(parent.name),
                evalparse("k1"), evalparse("k2"),
                evalparse("tb")),
            SFORB = SFORB.solution(outtimes_plot,
                evalparse(parent.name),
                evalparse(paste("k", parent.name, "bound", sep="_")),
                evalparse(paste("k", sub("free", "bound", parent.name), "free", sep="_")),
                evalparse(paste("k", parent.name, "sink", sep="_")))
          )
          out_plot <- cbind(outtimes_plot, o_plot)
          dimnames(out_plot) <- list(outtimes_plot, c("time", sub("_free", "", parent.name)))
        }
        if(solution == "eigen") {
          o_plot <- matrix(mapply(f.out, outtimes_plot), 
            nrow = length(mod_vars), ncol = length(outtimes_plot))
          dimnames(o_plot) <- list(mod_vars, outtimes_plot)
          out_plot <- cbind(time = outtimes_plot, t(o_plot))
        } 
        if (solution == "deSolve") {
          out_plot <- ode(
            y = odeini,
            times = outtimes_plot,
            func = mkindiff, 
            parms = odeparms)
        }
        out_transformed_plot <- data.frame(time = out_plot[,"time"])
        for (var in names(mkinmod$map)) {
          if((length(mkinmod$map[[var]]) == 1) || solution == "analytical") {
            out_transformed_plot[var] <- out_plot[, var]
          } else {
            out_transformed_plot[var] <- rowSums(out_plot[, mkinmod$map[[var]]])
          }
        }    
        out_transformed_plot <<- out_transformed_plot

        plot(0, type="n", 
          xlim = range(observed$time), ylim = range(observed$value, na.rm=TRUE),
          xlab = "Time", ylab = "Observed")
        col_obs <- pch_obs <- 1:length(obs_vars)
        names(col_obs) <- names(pch_obs) <- obs_vars
        for (obs_var in obs_vars) {
          points(subset(observed, name == obs_var, c(time, value)), 
            pch = pch_obs[obs_var], col = col_obs[obs_var])
        }
        matlines(out_transformed_plot$time, out_transformed_plot[-1])
        legend("topright", inset=c(0.05, 0.05), legend=obs_vars, 
          col=col_obs, pch=pch_obs, lty=1:length(pch_obs))
      }
    
      assign("cost.old", mC$model, inherits=TRUE)
    }
	# HACK to make nls.lm respect the penalty, as it just uses residuals and ignores the cost
	mC$residuals$res <- mC$residuals$res + mC$penalties / length(mC$residuals$res)
	
    return(mC)
  }
  fit <-modFit(cost, c(state.ini.optim, parms.optim), lower = lower, upper = upper, ...)

  # We need to return some more data for summary and plotting
  fit$solution <- solution
  if (solution == "eigen") {
    fit$coefmat <- mkinmod$coefmat
  } 
  if (solution == "deSolve") {
    fit$mkindiff <- mkindiff
  }
  if (plot == TRUE) {
    fit$out_transformed_plot = out_transformed_plot
  }

  # We also need various other information for summary and plotting
  fit$map <- mkinmod$map
  fit$diffs <- mkinmod$diffs
  
    # mkin_long_to_wide does not handle ragged data
#    fit$observed <- mkin_long_to_wide(observed)
    fit$observed <- reshape(observed, direction="wide", timevar="name", idvar="time")
    names(fit$observed) <- c("time", as.vector(unique(observed$name)))
  
  predicted_long <- mkin_wide_to_long(out_predicted, time = "time")
  fit$predicted <- out_predicted

  # Collect initial parameter values in two dataframes
  fit$start <- data.frame(initial = c(state.ini.optim, parms.optim))
  fit$start$type = c(rep("state", length(state.ini.optim)), rep("deparm", length(parms.optim)))
  fit$start$lower <- lower
  fit$start$upper <- upper

  fit$fixed <- data.frame(
    value = c(state.ini.fixed, parms.fixed))
  fit$fixed$type = c(rep("state", length(state.ini.fixed)), rep("deparm", length(parms.fixed)))

#  # Calculate chi2 error levels according to FOCUS (2006)
  fit$errmin <- CakeChi2(observed, predicted_long, obs_vars, parms.optim, state.ini.optim)

  # Calculate dissipation times DT50 and DT90 and formation fractions
  parms.all = c(fit$par, parms.fixed)
  fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)), 
    row.names = obs_vars)
  fit$ff <- vector()
  fit$SFORB <- vector()
  for (obs_var in obs_vars) {
    type = names(mkinmod$map[[obs_var]])[1]  

    fit$distimes[obs_var, ] = CakeDT(type,obs_var,parms.all)
  }

  fit$penalties <- CakePenaltiesLong(parms.all, out_predicted, observed)
  
  # Collect observed, predicted and residuals
  data<-observed
  data$err<-rep(NA,length(data$time))
  data <- merge(data, predicted_long, by = c("time", "name"))
  names(data)<-c("time", "variable", "observed","err-var", "predicted")
  data$residual <- data$observed - data$predicted
  data$variable <- ordered(data$variable, levels = obs_vars)
  fit$data <- data[order(data$variable, data$time), ]
  fit$atol <- atol

  class(fit) <- c("CakeFit", "mkinfit", "modFit") 
  return(fit)
}