aboutsummaryrefslogtreecommitdiff
path: root/R/TOXSWA_cwa.R
diff options
context:
space:
mode:
authorJohannes Ranke <jranke@uni-bremen.de>2016-09-27 23:00:48 +0200
committerJohannes Ranke <jranke@uni-bremen.de>2016-09-27 23:00:48 +0200
commit12a31f4c130c551f82232d9ef7dfb608bd52c53f (patch)
tree2525ab1ea4102a6edddbd0c2f03f4a851bf2f9c5 /R/TOXSWA_cwa.R
parent0d958ab6f84b569b5437f231c56004890c4ae23b (diff)
Reorganise repository using standard package layout
Diffstat (limited to 'R/TOXSWA_cwa.R')
-rw-r--r--R/TOXSWA_cwa.R380
1 files changed, 380 insertions, 0 deletions
diff --git a/R/TOXSWA_cwa.R b/R/TOXSWA_cwa.R
new file mode 100644
index 0000000..c5ddce9
--- /dev/null
+++ b/R/TOXSWA_cwa.R
@@ -0,0 +1,380 @@
+# Copyright (C) 2014,2015,2016 Johannes Ranke
+# Contact: jranke@uni-bremen.de
+# This file is part of the R package pfm
+
+# This program is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation, either version 3 of the License, or (at your option) any later
+# version.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>
+
+#' Read TOXSWA surface water concentrations
+#'
+#' Read TOXSWA hourly concentrations of a chemical substance in a specific
+#' segment of a TOXSWA surface water body. Per default, the data for the last
+#' segment are imported. As TOXSWA 4 reports the values at the end of the hour
+#' (ConLiqWatLayCur) in its summary file, we use this value as well instead
+#' of the hourly averages (ConLiqWatLay).
+#'
+#' @param filename The filename of the cwa file (TOXSWA 2.x.y or similar) or the
+#' out file (FOCUS TOXSWA 4, i.e. TOXSWA 4.4.2 or similar).
+#' @param basedir The path to the directory where the cwa file resides.
+#' @param zipfile Optional path to a zip file containing the cwa file.
+#' @param segment The segment for which the data should be read. Either "last", or
+#' the segment number.
+#' @param total Set this to TRUE in order to read total concentrations as well. This is
+#' only necessary for .out files as generated by TOXSWA 4.4.2 or similar, not for .cwa
+#' files. For .cwa files, the total concentration is always read as well.
+#' @param substance For TOXSWA 4 .out files, the default value "parent" leads
+#' to reading concentrations of the parent compound. Alternatively, the substance
+#' of interested can be selected by its code name.
+#' @param windows Numeric vector of width of moving windows in days, for calculating
+#' maximum time weighted average concentrations and areas under the curve.
+#' @param thresholds Numeric vector of threshold concentrations in µg/L for
+#' generating event statistics.
+#' @importFrom readr read_fwf fwf_empty
+#' @return An instance of an R6 object of class
+#' \code{\link{TOXSWA_cwa}}.
+#' @export
+#' @author Johannes Ranke
+#' @examples
+#' H_sw_D4_pond <- read.TOXSWA_cwa("00001p_pa.cwa",
+#' basedir = "SwashProjects/project_H_sw/TOXSWA",
+#' zipfile = system.file("testdata/SwashProjects.zip",
+#' package = "pfm"))
+read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL,
+ segment = "last", substance = "parent",
+ total = FALSE,
+ windows = NULL, thresholds = NULL)
+{
+ if (!missing(filename)) {
+ cwa <- TOXSWA_cwa$new(filename, basedir, zipfile,
+ substance = substance, total = total)
+ if (!is.null(windows[1])) cwa$moving_windows(windows)
+ if (!is.null(thresholds[1])) cwa$get_events(thresholds)
+ invisible(cwa)
+ } else {
+ message("You need to specify a filename for the cwa file to be read")
+ }
+}
+
+#' Plot TOXSWA surface water concentrations
+#'
+#' Plot TOXSWA hourly concentrations of a chemical substance in a specific
+#' segment of a TOXSWA surface water body.
+#'
+#' @import graphics
+#' @param x The TOXSWA_cwa object to be plotted.
+#' @param xlab,ylab Labels for x and y axis.
+#' @param time_column What should be used for the time axis. If "t_firstjan" is chosen,
+#' the time is given in days relative to the first of January in the first year.
+#' @param add Should we add to an existing plot?
+#' @param total Should the total concentration in water be plotted, including substance sorbed
+#' to suspended matter?
+#' @param LC_TIME Specification of the locale used to format dates
+#' @param ... Further arguments passed to \code{plot} if we are not adding to an existing plot
+#' @export
+#' @author Johannes Ranke
+#' @examples
+#' H_sw_D4_pond <- read.TOXSWA_cwa("00001p_pa.cwa",
+#' basedir = "SwashProjects/project_H_sw/TOXSWA",
+#' zipfile = system.file("testdata/SwashProjects.zip",
+#' package = "pfm"))
+#' plot(H_sw_D4_pond)
+plot.TOXSWA_cwa <- function(x, time_column = c("datetime", "t", "t_firstjan", "t_rel_to_max"),
+ xlab = "default", ylab = "default",
+ add = FALSE,
+ total = FALSE, LC_TIME = "C", ...)
+{
+ time_column = match.arg(time_column)
+ cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L")
+ lct <- Sys.getlocale("LC_TIME")
+ tmp <- Sys.setlocale("LC_TIME", LC_TIME)
+ if (xlab == "default") {
+ xlab = switch(time_column,
+ datetime = "Time",
+ t = "Time [days]",
+ t_firstjan = "Time since first of January [days]",
+ t_rel_to_max = "Time relative to maximum concentration [days]")
+ }
+ if (ylab == "default") {
+ ylab = paste( ifelse(total, "Total concentration", "Concentration"), "[\u03bcg/L]")
+ }
+ if (add) {
+ lines(x$cwas[c(time_column, cwa_column)], xlab = xlab, ylab = ylab, ...)
+ } else{
+ if (time_column == "datetime") {
+ plot(x$cwas$datetime, x$cwas[[cwa_column]], type = "l",
+ xlab = xlab, ylab = ylab, xaxt = "n", ...)
+ seq_x <- seq(min(x$cwas$datetime), max(x$cwas$datetime), by = "quarter")
+ axis(1, at = seq_x, labels = format(seq_x, "%b %Y"))
+ } else {
+ plot(x$cwas[c(time_column, cwa_column)], type = "l",
+ xlab = xlab, ylab = ylab, ...)
+ }
+ }
+ tmp <- Sys.setlocale("LC_TIME", lct)
+}
+
+#' R6 class for holding TOXSWA cwa concentration data and associated statistics
+#'
+#' An R6 class for holding TOXSWA cwa concentration data and some associated statistics.
+#' Usually, an instance of this class will be generated by \code{\link{read.TOXSWA_cwa}}.
+#'
+#' @docType class
+#' @importFrom R6 R6Class
+#' @export
+#' @format An \code{\link{R6Class}} generator object.
+#' @field filename Length one character vector.
+#' @field basedir Length one character vector.
+#' @field segment Length one integer, specifying for which segment the cwa data were read.
+#' @field cwas Dataframe holding the concentrations.
+#' @field events List of dataframes holding the event statistics for each threshold.
+#' @field windows Matrix of maximum time weighted average concentrations (TWAC_max)
+#' and areas under the curve in µg/day * h (AUC_max_h) or µg/day * d (AUC_max_d)
+#' for the requested moving window sizes in days.
+#' @section Methods:
+#' \describe{
+#' \item{\code{get_events(threshold, total = FALSE)}}{
+#' Populate a datataframe with event information for the specified threshold value
+#' in µg/L. If \code{total = TRUE}, the total concentration including the amount
+#' adsorbed to suspended matter will be used. The resulting dataframe is stored in the
+#' \code{events} field of the object.
+#' }
+#' \item{\code{moving_windows(windows, total = FALSE)}}{
+#' Add to the \code{windows} field described above.
+#' Again, if \code{total = TRUE}, the total concentration including the amount
+#' adsorbed to suspended matter will be used.
+#' }
+#' }
+#' @examples
+#' H_sw_R1_stream <- read.TOXSWA_cwa("00003s_pa.cwa",
+#' basedir = "SwashProjects/project_H_sw/TOXSWA",
+#' zipfile = system.file("testdata/SwashProjects.zip",
+#' package = "pfm"))
+#' H_sw_R1_stream$get_events(c(2, 10))
+#' H_sw_R1_stream$moving_windows(c(7, 21))
+#' print(H_sw_R1_stream)
+#' @keywords data
+
+TOXSWA_cwa <- R6Class("TOXSWA_cwa",
+ public = list(
+ filename = NULL,
+ basedir = NULL,
+ zipfile = NULL,
+ segment = NULL,
+ substance = NULL,
+ cwas = NULL,
+ windows = NULL,
+ events = list(),
+ initialize = function(filename, basedir, zipfile = NULL,
+ segment = "last", substance = "parent", total = FALSE) {
+ self$filename <- filename
+ self$basedir <- basedir
+ self$zipfile <- zipfile
+ if (!is.null(zipfile)) {
+ try(file_connection <- unz(zipfile, paste0(basedir, "/", filename)))
+ } else {
+ try(file_connection <- file(file.path(basedir, filename), "rt"))
+ }
+
+
+ if (grepl(".cwa$", filename)) {
+ # cwa file from FOCUS TOXSWA 3 (TOXSWA 2.x.y)
+ cwa_all_segments <- try(
+ read.table(file_connection,
+ sep = "", skip = 40,
+ encoding = "UTF-8",
+ colClasses = c("character", "numeric",
+ "integer", rep("numeric", 5)),
+ col.names = c("datetime", "t", "segment",
+ "xcd", "cwa_tot", "cwa", "Xss", "Xmp")))
+
+ if (is.null(zipfile)) close(file_connection) # only needed for files
+
+ if (!inherits(cwa_all_segments, "try-error")) {
+ available_segments = 1:max(cwa_all_segments$segment)
+ if (segment == "last") segment = max(available_segments)
+ if (!segment %in% available_segments) stop("Invalid segment specified")
+ self$segment <- segment
+ cwa <- subset(cwa_all_segments, segment == self$segment,
+ c("datetime", "t", "segment", "cwa", "cwa_tot"))
+ lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C")
+ cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%H:%M")
+ Sys.setlocale("LC_TIME", lct)
+ startyear = format(cwa$datetime[1], "%Y")
+ firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d")
+ cwa$t_firstjan <- as.numeric(difftime(cwa$datetime,
+ firstjan, units = "days"))
+
+ t_max = cwa[which.max(cwa$cwa), "t"]
+ cwa$t_rel_to_max = cwa$t - t_max
+ cwa$cwa_mug_per_L <- cwa$cwa * 1000
+ cwa$cwa_tot_mug_per_L <- cwa$cwa_tot * 1000
+ self$cwas <- cwa[c("datetime", "t", "t_firstjan",
+ "t_rel_to_max",
+ "cwa_mug_per_L",
+ "cwa_tot_mug_per_L")]
+ } else {
+ stop("Could not read ", filename)
+ }
+ } else {
+ # out file from FOCUS TOXSWA 4 (TOXSWA 4.4.2 or similar)
+ outfile <- try(readLines(file_connection))
+
+ close(file_connection) # only needed for files
+
+ if (inherits(outfile, "try-error")) {
+ stop("Could not read ", filename)
+ } else {
+ # Get the substance name(s)
+ sub_lines <- grep(".*0.000.*ConLiqWatLayCur_", outfile[1:50], value = TRUE)
+ substances <- gsub(".*ConLiqWatLayCur_(.*?) *[0-9].*", "\\1", sub_lines)
+ if (!substance %in% c("parent", substances)) {
+ stop("No data for substance ", substance, " present in the .out file.")
+ }
+
+ # Generate field name for the concentrations at end of hour for the
+ # substance of interest
+ if (substance == "parent") {
+ cwa_string = paste0("ConLiqWatLayCur_", substances[1])
+ } else {
+ cwa_string = paste0("ConLiqWatLayCur_", substance)
+ }
+
+ cwa_lines <- grep(cwa_string, outfile, value = TRUE)
+
+ cwa_all_segments <- read_fwf(paste(cwa_lines, collapse = "\n"),
+ fwf_empty(paste(tail(cwa_lines), collapse = "\n")))
+
+ # Append time "-00h00" to datetime in first row, as this is not (always?) present
+ # in the line ConLiqWatLayCur
+ if (nchar(cwa_all_segments[1, "X2"]) == 11) {
+ cwa_all_segments[1, "X2"] = paste0(cwa_all_segments[1, "X2"], "-00h00")
+ }
+
+ available_segments = 1:(ncol(cwa_all_segments) - 3)
+ if (segment == "last") segment = max(available_segments)
+ if (!segment %in% available_segments) stop("Invalid segment specified")
+ self$segment <- segment
+ cwa <- data.frame(
+ datetime = as.character(cwa_all_segments$X2),
+ t = cwa_all_segments$X1,
+ cwa = cwa_all_segments[[3 + segment]]
+ )
+ if (total) {
+ cwa_tot_lines <- outfile[grep("ConSysWatLay_", outfile)] # hourly total conc.
+ cwa_tot_all_segments <- read.table(text = cwa_lines)
+ cwa$cwa_tot = cwa_tot_all_segments[[3 + segment]]
+ }
+ lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C")
+ cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%Hh%M")
+ Sys.setlocale("LC_TIME", lct)
+
+ startyear = format(cwa$datetime[1], "%Y")
+ firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d")
+ cwa$t_firstjan <- as.numeric(difftime(cwa$datetime,
+ firstjan, units = "days"))
+ t_max = cwa[which.max(cwa$cwa), "t"]
+ cwa$t_rel_to_max = cwa$t - t_max
+ cwa$cwa_mug_per_L <- cwa$cwa * 1000
+ self$cwas <- cwa[c("datetime", "t", "t_firstjan",
+ "t_rel_to_max",
+ "cwa_mug_per_L")]
+ if (total) {
+ self$cwas$cwa_tot_mug_per_L <- cwa$cwa_tot * 1000
+ }
+ }
+ }
+ },
+ moving_windows = function(windows, total = FALSE) {
+ window_names = paste(windows, "days")
+ n = length(window_names)
+ self$windows <- data.frame(window = window_names,
+ max_TWAC = numeric(n),
+ max_AUC_h = numeric(n),
+ max_AUC_d = numeric(n))
+ if (missing(windows)) {
+ stop("You need to specify at least one moving window size in days")
+ }
+ cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L")
+ for (i in seq_along(windows)) {
+ window_size = windows[i]
+ filter_size = window_size * 24
+ max_TWAC = max(filter(self$cwas[cwa_column], rep(1/filter_size, filter_size),
+ "convolution"), na.rm = TRUE)
+ max_AUC_h = max_TWAC * filter_size
+ max_AUC_d = max_TWAC * window_size
+
+ self$windows[i, -1] = c(max_TWAC, max_AUC_h, max_AUC_d)
+ }
+ invisible(self)
+ },
+ get_events = function(thresholds, total = FALSE) {
+ if (missing(thresholds)) {
+ stop("You need to specify at least one threshold concentration in \u03bcg/L")
+ }
+ for (threshold in thresholds) {
+ events = data.frame(t_start = numeric(), cwa_max = numeric(),
+ duration = numeric(), pre_interval = numeric(),
+ AUC_h = numeric(), AUC_d = numeric())
+ cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L")
+
+ event_end = 0
+ event = FALSE
+ event_max = 0
+ event_nr = 0
+ n_rows = nrow(self$cwas)
+ for (i in 1:n_rows) {
+ cwa_cur = self$cwas[i, cwa_column]
+ if (event == FALSE) {
+ if (cwa_cur > threshold) {
+ event_start = self$cwas[i, "t"]
+ pre_interval = event_start - event_end
+ i_start = i
+ event = TRUE
+ event_max = cwa_cur
+ }
+ } else {
+ if (cwa_cur > event_max) event_max = cwa_cur
+ if (cwa_cur < threshold || i == n_rows) {
+ event_nr = event_nr + 1
+ i_end = i
+ if (i == n_rows) i_end = i
+ event_end = self$cwas[i_end, "t"]
+ event_length = event_end - event_start
+ event_cwas <- self$cwas[i_start:(i_end - 1), cwa_column]
+ event_AUC_h = sum(event_cwas)
+ event_AUC_d = event_AUC_h / 24
+ events[event_nr, ] = c(event_start, event_max, event_length,
+ pre_interval, event_AUC_h, event_AUC_d)
+ event = FALSE
+ }
+ }
+ }
+
+ self$events[[as.character(threshold)]] <- events
+ }
+ invisible(self)
+ },
+ print = function() {
+ cat("<TOXSWA_cwa> data from file", self$filename, "segment", self$segment, "\n")
+ print(head(self$cwas))
+ cat("Moving window analysis\n")
+ print(self$windows)
+ for (threshold in names(self$events)) {
+ cat("Event statistics for threshold", threshold, "\n")
+ if (nrow(self$events[[threshold]]) == 0) cat("No events found\n")
+ else print(self$events[[threshold]])
+ }
+ }
+ )
+)
+# vim: set ts=2 sw=2 expandtab:

Contact - Imprint