# Copyright (C) 2014,2015  Johannes Ranke
# Contact: jranke@uni-bremen.de
# This file is part of the R package pfm

# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.

# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.

# You should have received a copy of the GNU General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>

#' Read TOXSWA surface water concentrations
#'
#' Read TOXSWA hourly concentrations of a chemical substance in a specific
#' segment of a TOXSWA surface water body. Per default, the data for the last
#' segment are imported.
#'
#' @param filename The filename of the cwa file.
#' @param basedir The path to the directory where the cwa file resides.
#' @param zipfile Optional path to a zip file containing the cwa file.
#' @param segment The segment for which the data should be read. Either "last", or 
#'   the segment number.
#' @param windows Numeric vector of width of moving windows in days, for calculating
#'   maximum time weighted average concentrations and areas under the curve.
#' @param thresholds Numeric vector of threshold concentrations in µg/L for
#'   generating event statistics.  
#' @return An instance of an R6 object of class
#' \code{\link{TOXSWA_cwa}}.
#' @export
#' @author Johannes Ranke
#' @examples
#' H_sw_D4_pond  <- read.TOXSWA_cwa("00001p_pa.cwa",
#'                                  basedir = "SwashProjects/project_H_sw/TOXSWA",
#'                                  zipfile = system.file("testdata/SwashProjects.zip",
#'                                              package = "pfm"))
read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL, 
                            segment = "last",
                            windows = NULL, thresholds = NULL)
{
  if (!missing(filename)) {
    cwa <- TOXSWA_cwa$new(filename, basedir, zipfile)
    if (!is.null(windows[1])) cwa$moving_windows(windows)
    if (!is.null(thresholds[1])) cwa$get_events(thresholds)
    invisible(cwa)
  } else {
    message("You need to specify a filename for the cwa file to be read")
  }
}

#' Plot TOXSWA surface water concentrations
#'
#' Plot TOXSWA hourly concentrations of a chemical substance in a specific
#' segment of a segment of a TOXSWA surface water body.
#'
#' @param x The TOXSWA_cwa object to be plotted.
#' @param xlab,ylab Labels for x and y axis.
#' @param time_column What should be used for the time axis. If "t_firstjan" is chosen,
#'   the time is given in days relative to the first of January in the first year.
#' @param add Should we add to an existing plot?
#' @param total Should the total concentration in water be plotted, including substance sorbed
#'   to suspended matter?
#' @param LC_TIME Specification of the locale used to format dates
#' @param ... Further arguments passed to \code{plot} if we are not adding to an existing plot
#' @export
#' @author Johannes Ranke
#' @examples
#' H_sw_D4_pond  <- read.TOXSWA_cwa("00001p_pa.cwa",
#'                                  basedir = "SwashProjects/project_H_sw/TOXSWA",
#'                                  zipfile = system.file("testdata/SwashProjects.zip",
#'                                              package = "pfm"))
#' plot(H_sw_D4_pond)
plot.TOXSWA_cwa <- function(x, time_column = c("datetime", "t", "t_firstjan"),
                            xlab = "default", ylab = "default",
                            add = FALSE,
                            total = FALSE, LC_TIME = "C", ...)
{
  time_column = match.arg(time_column)
  cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L")
  lct <- Sys.getlocale("LC_TIME")
  tmp <- Sys.setlocale("LC_TIME", LC_TIME)
  if (xlab == "default") {
    xlab = switch(time_column,
                datetime = "Time",
                t = "Time [days]",
                t_firstjan = "Time since first of January [days]")  
  }
  if (ylab == "default") {
    ylab = paste( ifelse(total, "Total concentration", "Concentration"), "[\u03bcg/L]")
  }
  if (add) {
    lines(x$cwas[c(time_column, cwa_column)], xlab = xlab, ylab = ylab, ...)
  } else{
    plot(x$cwas[c(time_column, cwa_column)], type = "l",
         xlab = xlab, ylab = ylab, ...)
  }
  tmp <- Sys.setlocale("LC_TIME", lct)
}

#' R6 class for holding TOXSWA cwa concentration data and associated statistics
#'
#' An R6 class for holding TOXSWA cwa concentration data and some associated statistics.
#' Usually, an instance of this class will be generated by \code{\link{read.TOXSWA_cwa}}.
#'
#' @docType class
#' @importFrom R6 R6Class
#' @export
#' @format An \code{\link{R6Class}} generator object.
#' @field filename Length one character vector.
#' @field basedir Length one character vector.
#' @field segment Length one integer, specifying for which segment the cwa data were read.
#' @field cwas Dataframe holding the concentrations.
#' @field events List of dataframes holding the event statistics for each threshold.
#' @field windows Matrix of maximum time weighted average concentrations (TWAC_max)
#'   and areas under the curve in µg/day * h (AUC_max_h) or µg/day * d (AUC_max_d) 
#'   for the requested moving window sizes in days.
#' @section Methods:
#' \describe{
#'   \item{\code{get_events(threshold, total = FALSE)}}{
#'         Populate a datataframe with event information for the specified threshold value
#'         in µg/L. If \code{total = TRUE}, the total concentration including the amount
#'         adsorbed to suspended matter will be used. The resulting dataframe is stored in the 
#'         \code{events} field of the object.
#'         }
#'   \item{\code{moving_windows(windows, total = FALSE)}}{
#'         Add to the \code{windows} field described above.
#'         Again, if \code{total = TRUE}, the total concentration including the amount
#'         adsorbed to suspended matter will be used.
#'         }
#' }
#' @examples
#' H_sw_R1_stream  <- read.TOXSWA_cwa("00003s_pa.cwa",
#'                                  basedir = "SwashProjects/project_H_sw/TOXSWA",
#'                                  zipfile = system.file("testdata/SwashProjects.zip",
#'                                              package = "pfm"))
#' H_sw_R1_stream$get_events(c(2, 10))
#' H_sw_R1_stream$moving_windows(c(7, 21))
#' print(H_sw_R1_stream)
#' @keywords data

TOXSWA_cwa <- R6Class("TOXSWA_cwa",
  public = list(
    filename = NULL,
    basedir = NULL,
    zipfile = NULL,
    segment = NULL,
    cwas = NULL,
    windows = NULL,
    events = NULL,
    initialize = function(filename, basedir, zipfile = NULL, segment = "last") {
      self$filename <- filename
      self$basedir <- basedir
      self$zipfile <- zipfile
      if (!is.null(zipfile)) {
        try(file_connection <- unz(zipfile, paste0(basedir, "/", filename)))
      } else {
        try(file_connection <- file(file.path(basedir, filename), "rt"))
      }
      cwa_all_segments <- try(read.table(file_connection,
                                         sep = "", skip = 40, 
                                         colClasses = c("character", "numeric",
                                                        "integer", rep("numeric", 5)),
                                         col.names = c("datetime", "t", "segment",
                                                       "xcd", "cwa_tot", "cwa",
                                                       "Xss", "Xmp")))
      if (is.null(zipfile)) close(file_connection) # only needed for files

      if (!inherits(cwa_all_segments, "try-error")) {
        available_segments = 1:max(cwa_all_segments$segment)
        if (segment == "last") segment = max(available_segments)
        if (!segment %in% available_segments) stop("Invalid segment specified")
        self$segment <- segment
        cwa <- subset(cwa_all_segments, segment == self$segment, 
                      c("datetime", "t", "segment", "cwa", "cwa_tot"))
        lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C")
        cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%H:%M")
        Sys.setlocale("LC_TIME", lct)
        startyear = format(cwa$datetime[1], "%Y") 
        firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d")
        cwa$t_firstjan <- as.numeric(difftime(cwa$datetime, 
                                                    firstjan, units = "days"))

        t_max = cwa[which.max(cwa$cwa), "t"]
        cwa$t_rel_to_max = cwa$t - t_max
        cwa$cwa_mug_per_L <- cwa$cwa * 1000
        cwa$cwa_tot_mug_per_L <- cwa$cwa_tot * 1000
        self$cwas <- cwa[c("datetime", "t", "t_firstjan",
                                        "t_rel_to_max",
                                        "cwa_mug_per_L", 
                                        "cwa_tot_mug_per_L")]
        self$events <- list()
      } else {
        stop("Could not read ", filename)
      }
    },
    moving_windows = function(windows, total = FALSE) {
      window_names = paste(windows, "days")
      n = length(window_names)
      self$windows <- data.frame(window = window_names,
                                 max_TWAC = numeric(n),
                                 max_AUC_h = numeric(n),
                                 max_AUC_d = numeric(n))
      if (missing(windows)) {
        stop("You need to specify at least one moving window size in days")
      }
      cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L")
      for (i in seq_along(windows)) {
        window_size = windows[i]
        filter_size = window_size * 24
        max_TWAC = max(filter(self$cwas[cwa_column], rep(1/filter_size, filter_size), 
                            "convolution"), na.rm = TRUE)
        max_AUC_h = max_TWAC * filter_size
        max_AUC_d = max_TWAC * window_size
        
        self$windows[i, -1] = c(max_TWAC, max_AUC_h, max_AUC_d)
      }
      invisible(self)
    },
    get_events = function(thresholds, total = FALSE) {
      if (missing(thresholds)) {
        stop("You need to specify at least one threshold concentration in \u03bcg/L")
      }
      for (threshold in thresholds) {
        events = data.frame(t_start = numeric(), cwa_max = numeric(), 
                            duration = numeric(), pre_interval = numeric(), 
                            AUC_h = numeric(), AUC_d = numeric())
        cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L")

        event_end = 0
        event = FALSE
        event_max = 0
        event_nr = 0
        n_rows = nrow(self$cwas)
        for (i in 1:n_rows) {
          cwa_cur = self$cwas[i, cwa_column]
          if (event == FALSE) {
            if (cwa_cur > threshold) {
              event_start = self$cwas[i, "t"]
              pre_interval = event_start - event_end
              i_start = i
              event = TRUE
            }
          } else {
            if (cwa_cur > event_max) event_max = cwa_cur
            if (cwa_cur < threshold || i == n_rows) {
              event_nr = event_nr + 1
              i_end = i
              if (i == n_rows) i_end = i
              event_end = self$cwas[i_end, "t"]
              event_length = event_end - event_start
              event_cwas <- self$cwas[i_start:(i_end - 1), cwa_column]
              event_AUC_h = sum(event_cwas)
              event_AUC_d = event_AUC_h / 24
              events[event_nr, ] = c(event_start, event_max, event_length,
                                     pre_interval, event_AUC_h, event_AUC_d)
              event = FALSE
            } 
          }
        }
        
        self$events[[as.character(threshold)]] <- events
      }
      invisible(self)
    },
    print = function() {
      cat("<TOXSWA_cwa> data from file", self$filename, "segment", self$segment, "\n")
      print(head(self$cwas))
      cat("Moving window analysis\n")
      print(self$windows)
      for (threshold in names(self$events)) {
        cat("Event statistics for threshold", threshold, "\n")
        if (nrow(self$events[[threshold]]) == 0) cat("No events found\n")
        else print(self$events[[threshold]])
      }
    }
  )
)
# vim: set ts=2 sw=2 expandtab: