From 12a31f4c130c551f82232d9ef7dfb608bd52c53f Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Tue, 27 Sep 2016 23:00:48 +0200 Subject: Reorganise repository using standard package layout --- R/TOXSWA_cwa.R | 380 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 380 insertions(+) create mode 100644 R/TOXSWA_cwa.R (limited to 'R/TOXSWA_cwa.R') diff --git a/R/TOXSWA_cwa.R b/R/TOXSWA_cwa.R new file mode 100644 index 0000000..c5ddce9 --- /dev/null +++ b/R/TOXSWA_cwa.R @@ -0,0 +1,380 @@ +# Copyright (C) 2014,2015,2016 Johannes Ranke +# Contact: jranke@uni-bremen.de +# This file is part of the R package pfm + +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. + +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. + +# You should have received a copy of the GNU General Public License along with +# this program. If not, see + +#' Read TOXSWA surface water concentrations +#' +#' Read TOXSWA hourly concentrations of a chemical substance in a specific +#' segment of a TOXSWA surface water body. Per default, the data for the last +#' segment are imported. As TOXSWA 4 reports the values at the end of the hour +#' (ConLiqWatLayCur) in its summary file, we use this value as well instead +#' of the hourly averages (ConLiqWatLay). +#' +#' @param filename The filename of the cwa file (TOXSWA 2.x.y or similar) or the +#' out file (FOCUS TOXSWA 4, i.e. TOXSWA 4.4.2 or similar). +#' @param basedir The path to the directory where the cwa file resides. +#' @param zipfile Optional path to a zip file containing the cwa file. +#' @param segment The segment for which the data should be read. Either "last", or +#' the segment number. +#' @param total Set this to TRUE in order to read total concentrations as well. This is +#' only necessary for .out files as generated by TOXSWA 4.4.2 or similar, not for .cwa +#' files. For .cwa files, the total concentration is always read as well. +#' @param substance For TOXSWA 4 .out files, the default value "parent" leads +#' to reading concentrations of the parent compound. Alternatively, the substance +#' of interested can be selected by its code name. +#' @param windows Numeric vector of width of moving windows in days, for calculating +#' maximum time weighted average concentrations and areas under the curve. +#' @param thresholds Numeric vector of threshold concentrations in µg/L for +#' generating event statistics. +#' @importFrom readr read_fwf fwf_empty +#' @return An instance of an R6 object of class +#' \code{\link{TOXSWA_cwa}}. +#' @export +#' @author Johannes Ranke +#' @examples +#' H_sw_D4_pond <- read.TOXSWA_cwa("00001p_pa.cwa", +#' basedir = "SwashProjects/project_H_sw/TOXSWA", +#' zipfile = system.file("testdata/SwashProjects.zip", +#' package = "pfm")) +read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL, + segment = "last", substance = "parent", + total = FALSE, + windows = NULL, thresholds = NULL) +{ + if (!missing(filename)) { + cwa <- TOXSWA_cwa$new(filename, basedir, zipfile, + substance = substance, total = total) + if (!is.null(windows[1])) cwa$moving_windows(windows) + if (!is.null(thresholds[1])) cwa$get_events(thresholds) + invisible(cwa) + } else { + message("You need to specify a filename for the cwa file to be read") + } +} + +#' Plot TOXSWA surface water concentrations +#' +#' Plot TOXSWA hourly concentrations of a chemical substance in a specific +#' segment of a TOXSWA surface water body. +#' +#' @import graphics +#' @param x The TOXSWA_cwa object to be plotted. +#' @param xlab,ylab Labels for x and y axis. +#' @param time_column What should be used for the time axis. If "t_firstjan" is chosen, +#' the time is given in days relative to the first of January in the first year. +#' @param add Should we add to an existing plot? +#' @param total Should the total concentration in water be plotted, including substance sorbed +#' to suspended matter? +#' @param LC_TIME Specification of the locale used to format dates +#' @param ... Further arguments passed to \code{plot} if we are not adding to an existing plot +#' @export +#' @author Johannes Ranke +#' @examples +#' H_sw_D4_pond <- read.TOXSWA_cwa("00001p_pa.cwa", +#' basedir = "SwashProjects/project_H_sw/TOXSWA", +#' zipfile = system.file("testdata/SwashProjects.zip", +#' package = "pfm")) +#' plot(H_sw_D4_pond) +plot.TOXSWA_cwa <- function(x, time_column = c("datetime", "t", "t_firstjan", "t_rel_to_max"), + xlab = "default", ylab = "default", + add = FALSE, + total = FALSE, LC_TIME = "C", ...) +{ + time_column = match.arg(time_column) + cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L") + lct <- Sys.getlocale("LC_TIME") + tmp <- Sys.setlocale("LC_TIME", LC_TIME) + if (xlab == "default") { + xlab = switch(time_column, + datetime = "Time", + t = "Time [days]", + t_firstjan = "Time since first of January [days]", + t_rel_to_max = "Time relative to maximum concentration [days]") + } + if (ylab == "default") { + ylab = paste( ifelse(total, "Total concentration", "Concentration"), "[\u03bcg/L]") + } + if (add) { + lines(x$cwas[c(time_column, cwa_column)], xlab = xlab, ylab = ylab, ...) + } else{ + if (time_column == "datetime") { + plot(x$cwas$datetime, x$cwas[[cwa_column]], type = "l", + xlab = xlab, ylab = ylab, xaxt = "n", ...) + seq_x <- seq(min(x$cwas$datetime), max(x$cwas$datetime), by = "quarter") + axis(1, at = seq_x, labels = format(seq_x, "%b %Y")) + } else { + plot(x$cwas[c(time_column, cwa_column)], type = "l", + xlab = xlab, ylab = ylab, ...) + } + } + tmp <- Sys.setlocale("LC_TIME", lct) +} + +#' R6 class for holding TOXSWA cwa concentration data and associated statistics +#' +#' An R6 class for holding TOXSWA cwa concentration data and some associated statistics. +#' Usually, an instance of this class will be generated by \code{\link{read.TOXSWA_cwa}}. +#' +#' @docType class +#' @importFrom R6 R6Class +#' @export +#' @format An \code{\link{R6Class}} generator object. +#' @field filename Length one character vector. +#' @field basedir Length one character vector. +#' @field segment Length one integer, specifying for which segment the cwa data were read. +#' @field cwas Dataframe holding the concentrations. +#' @field events List of dataframes holding the event statistics for each threshold. +#' @field windows Matrix of maximum time weighted average concentrations (TWAC_max) +#' and areas under the curve in µg/day * h (AUC_max_h) or µg/day * d (AUC_max_d) +#' for the requested moving window sizes in days. +#' @section Methods: +#' \describe{ +#' \item{\code{get_events(threshold, total = FALSE)}}{ +#' Populate a datataframe with event information for the specified threshold value +#' in µg/L. If \code{total = TRUE}, the total concentration including the amount +#' adsorbed to suspended matter will be used. The resulting dataframe is stored in the +#' \code{events} field of the object. +#' } +#' \item{\code{moving_windows(windows, total = FALSE)}}{ +#' Add to the \code{windows} field described above. +#' Again, if \code{total = TRUE}, the total concentration including the amount +#' adsorbed to suspended matter will be used. +#' } +#' } +#' @examples +#' H_sw_R1_stream <- read.TOXSWA_cwa("00003s_pa.cwa", +#' basedir = "SwashProjects/project_H_sw/TOXSWA", +#' zipfile = system.file("testdata/SwashProjects.zip", +#' package = "pfm")) +#' H_sw_R1_stream$get_events(c(2, 10)) +#' H_sw_R1_stream$moving_windows(c(7, 21)) +#' print(H_sw_R1_stream) +#' @keywords data + +TOXSWA_cwa <- R6Class("TOXSWA_cwa", + public = list( + filename = NULL, + basedir = NULL, + zipfile = NULL, + segment = NULL, + substance = NULL, + cwas = NULL, + windows = NULL, + events = list(), + initialize = function(filename, basedir, zipfile = NULL, + segment = "last", substance = "parent", total = FALSE) { + self$filename <- filename + self$basedir <- basedir + self$zipfile <- zipfile + if (!is.null(zipfile)) { + try(file_connection <- unz(zipfile, paste0(basedir, "/", filename))) + } else { + try(file_connection <- file(file.path(basedir, filename), "rt")) + } + + + if (grepl(".cwa$", filename)) { + # cwa file from FOCUS TOXSWA 3 (TOXSWA 2.x.y) + cwa_all_segments <- try( + read.table(file_connection, + sep = "", skip = 40, + encoding = "UTF-8", + colClasses = c("character", "numeric", + "integer", rep("numeric", 5)), + col.names = c("datetime", "t", "segment", + "xcd", "cwa_tot", "cwa", "Xss", "Xmp"))) + + if (is.null(zipfile)) close(file_connection) # only needed for files + + if (!inherits(cwa_all_segments, "try-error")) { + available_segments = 1:max(cwa_all_segments$segment) + if (segment == "last") segment = max(available_segments) + if (!segment %in% available_segments) stop("Invalid segment specified") + self$segment <- segment + cwa <- subset(cwa_all_segments, segment == self$segment, + c("datetime", "t", "segment", "cwa", "cwa_tot")) + lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C") + cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%H:%M") + Sys.setlocale("LC_TIME", lct) + startyear = format(cwa$datetime[1], "%Y") + firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d") + cwa$t_firstjan <- as.numeric(difftime(cwa$datetime, + firstjan, units = "days")) + + t_max = cwa[which.max(cwa$cwa), "t"] + cwa$t_rel_to_max = cwa$t - t_max + cwa$cwa_mug_per_L <- cwa$cwa * 1000 + cwa$cwa_tot_mug_per_L <- cwa$cwa_tot * 1000 + self$cwas <- cwa[c("datetime", "t", "t_firstjan", + "t_rel_to_max", + "cwa_mug_per_L", + "cwa_tot_mug_per_L")] + } else { + stop("Could not read ", filename) + } + } else { + # out file from FOCUS TOXSWA 4 (TOXSWA 4.4.2 or similar) + outfile <- try(readLines(file_connection)) + + close(file_connection) # only needed for files + + if (inherits(outfile, "try-error")) { + stop("Could not read ", filename) + } else { + # Get the substance name(s) + sub_lines <- grep(".*0.000.*ConLiqWatLayCur_", outfile[1:50], value = TRUE) + substances <- gsub(".*ConLiqWatLayCur_(.*?) *[0-9].*", "\\1", sub_lines) + if (!substance %in% c("parent", substances)) { + stop("No data for substance ", substance, " present in the .out file.") + } + + # Generate field name for the concentrations at end of hour for the + # substance of interest + if (substance == "parent") { + cwa_string = paste0("ConLiqWatLayCur_", substances[1]) + } else { + cwa_string = paste0("ConLiqWatLayCur_", substance) + } + + cwa_lines <- grep(cwa_string, outfile, value = TRUE) + + cwa_all_segments <- read_fwf(paste(cwa_lines, collapse = "\n"), + fwf_empty(paste(tail(cwa_lines), collapse = "\n"))) + + # Append time "-00h00" to datetime in first row, as this is not (always?) present + # in the line ConLiqWatLayCur + if (nchar(cwa_all_segments[1, "X2"]) == 11) { + cwa_all_segments[1, "X2"] = paste0(cwa_all_segments[1, "X2"], "-00h00") + } + + available_segments = 1:(ncol(cwa_all_segments) - 3) + if (segment == "last") segment = max(available_segments) + if (!segment %in% available_segments) stop("Invalid segment specified") + self$segment <- segment + cwa <- data.frame( + datetime = as.character(cwa_all_segments$X2), + t = cwa_all_segments$X1, + cwa = cwa_all_segments[[3 + segment]] + ) + if (total) { + cwa_tot_lines <- outfile[grep("ConSysWatLay_", outfile)] # hourly total conc. + cwa_tot_all_segments <- read.table(text = cwa_lines) + cwa$cwa_tot = cwa_tot_all_segments[[3 + segment]] + } + lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C") + cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%Hh%M") + Sys.setlocale("LC_TIME", lct) + + startyear = format(cwa$datetime[1], "%Y") + firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d") + cwa$t_firstjan <- as.numeric(difftime(cwa$datetime, + firstjan, units = "days")) + t_max = cwa[which.max(cwa$cwa), "t"] + cwa$t_rel_to_max = cwa$t - t_max + cwa$cwa_mug_per_L <- cwa$cwa * 1000 + self$cwas <- cwa[c("datetime", "t", "t_firstjan", + "t_rel_to_max", + "cwa_mug_per_L")] + if (total) { + self$cwas$cwa_tot_mug_per_L <- cwa$cwa_tot * 1000 + } + } + } + }, + moving_windows = function(windows, total = FALSE) { + window_names = paste(windows, "days") + n = length(window_names) + self$windows <- data.frame(window = window_names, + max_TWAC = numeric(n), + max_AUC_h = numeric(n), + max_AUC_d = numeric(n)) + if (missing(windows)) { + stop("You need to specify at least one moving window size in days") + } + cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L") + for (i in seq_along(windows)) { + window_size = windows[i] + filter_size = window_size * 24 + max_TWAC = max(filter(self$cwas[cwa_column], rep(1/filter_size, filter_size), + "convolution"), na.rm = TRUE) + max_AUC_h = max_TWAC * filter_size + max_AUC_d = max_TWAC * window_size + + self$windows[i, -1] = c(max_TWAC, max_AUC_h, max_AUC_d) + } + invisible(self) + }, + get_events = function(thresholds, total = FALSE) { + if (missing(thresholds)) { + stop("You need to specify at least one threshold concentration in \u03bcg/L") + } + for (threshold in thresholds) { + events = data.frame(t_start = numeric(), cwa_max = numeric(), + duration = numeric(), pre_interval = numeric(), + AUC_h = numeric(), AUC_d = numeric()) + cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L") + + event_end = 0 + event = FALSE + event_max = 0 + event_nr = 0 + n_rows = nrow(self$cwas) + for (i in 1:n_rows) { + cwa_cur = self$cwas[i, cwa_column] + if (event == FALSE) { + if (cwa_cur > threshold) { + event_start = self$cwas[i, "t"] + pre_interval = event_start - event_end + i_start = i + event = TRUE + event_max = cwa_cur + } + } else { + if (cwa_cur > event_max) event_max = cwa_cur + if (cwa_cur < threshold || i == n_rows) { + event_nr = event_nr + 1 + i_end = i + if (i == n_rows) i_end = i + event_end = self$cwas[i_end, "t"] + event_length = event_end - event_start + event_cwas <- self$cwas[i_start:(i_end - 1), cwa_column] + event_AUC_h = sum(event_cwas) + event_AUC_d = event_AUC_h / 24 + events[event_nr, ] = c(event_start, event_max, event_length, + pre_interval, event_AUC_h, event_AUC_d) + event = FALSE + } + } + } + + self$events[[as.character(threshold)]] <- events + } + invisible(self) + }, + print = function() { + cat(" data from file", self$filename, "segment", self$segment, "\n") + print(head(self$cwas)) + cat("Moving window analysis\n") + print(self$windows) + for (threshold in names(self$events)) { + cat("Event statistics for threshold", threshold, "\n") + if (nrow(self$events[[threshold]]) == 0) cat("No events found\n") + else print(self$events[[threshold]]) + } + } + ) +) +# vim: set ts=2 sw=2 expandtab: -- cgit v1.2.1