# Copyright (C) 2014,2015,2016,2017 Johannes Ranke # Contact: jranke@uni-bremen.de # This file is part of the R package pfm # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation, either version 3 of the License, or (at your option) any later # version. # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # You should have received a copy of the GNU General Public License along with # this program. If not, see #' Read TOXSWA surface water concentrations #' #' Read TOXSWA hourly concentrations of a chemical substance in a specific #' segment of a TOXSWA surface water body. Per default, the data for the last #' segment are imported. As TOXSWA 4 reports the values at the end of the hour #' (ConLiqWatLayCur) in its summary file, we use this value as well instead #' of the hourly averages (ConLiqWatLay). #' #' @param filename The filename of the cwa file (TOXSWA 2.x.y or similar) or the #' out file (FOCUS TOXSWA 4, i.e. TOXSWA 4.4.2 or similar). #' @param basedir The path to the directory where the cwa file resides. #' @param zipfile Optional path to a zip file containing the cwa file. #' @param segment The segment for which the data should be read. Either "last", or #' the segment number. #' @param total Set this to TRUE in order to read total concentrations as well. This is #' only necessary for .out files as generated by TOXSWA 4.4.2 or similar, not for .cwa #' files. For .cwa files, the total concentration is always read as well. #' @param substance For TOXSWA 4 .out files, the default value "parent" leads #' to reading concentrations of the parent compound. Alternatively, the substance #' of interested can be selected by its code name. #' @param windows Numeric vector of width of moving windows in days, for calculating #' maximum time weighted average concentrations and areas under the curve. #' @param thresholds Numeric vector of threshold concentrations in µg/L for #' generating event statistics. #' @importFrom readr read_fwf fwf_empty #' @return An instance of an R6 object of class #' \code{\link{TOXSWA_cwa}}. #' @export #' @author Johannes Ranke #' @examples #' H_sw_D4_pond <- read.TOXSWA_cwa("00001p_pa.cwa", #' basedir = "SwashProjects/project_H_sw/TOXSWA", #' zipfile = system.file("testdata/SwashProjects.zip", #' package = "pfm")) read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL, segment = "last", substance = "parent", total = FALSE, windows = NULL, thresholds = NULL) { if (!missing(filename)) { cwa <- TOXSWA_cwa$new(filename, basedir, zipfile, substance = substance, total = total) if (!is.null(windows[1])) cwa$moving_windows(windows) if (!is.null(thresholds[1])) cwa$get_events(thresholds) invisible(cwa) } else { message("You need to specify a filename for the cwa file to be read") } } #' Plot TOXSWA surface water concentrations #' #' Plot TOXSWA hourly concentrations of a chemical substance in a specific #' segment of a TOXSWA surface water body. #' #' @import graphics #' @param x The TOXSWA_cwa object to be plotted. #' @param xlab,ylab Labels for x and y axis. #' @param time_column What should be used for the time axis. If "t_firstjan" is chosen, #' the time is given in days relative to the first of January in the first year. #' @param add Should we add to an existing plot? #' @param threshold_factor The factor by which the data have to be lower than the maximum #' in order to get thinned for plotting (see next argument). #' @param thin_low If an integer greater than 1, the data close to zero (smaller than #' 1/threshold_factor of the maximum) in the series will be thinned by this factor #' in order to decrease the amount of data that is included in the plots #' @param total Should the total concentration in water be plotted, including substance sorbed #' to suspended matter? #' @param LC_TIME Specification of the locale used to format dates #' @param ... Further arguments passed to \code{plot} if we are not adding to an existing plot #' @export #' @author Johannes Ranke #' @examples #' H_sw_D4_pond <- read.TOXSWA_cwa("00001p_pa.cwa", #' basedir = "SwashProjects/project_H_sw/TOXSWA", #' zipfile = system.file("testdata/SwashProjects.zip", package = "pfm")) #' plot(H_sw_D4_pond) #' plot(H_sw_D4_pond, time_column = "t") #' plot(H_sw_D4_pond, time_column = "t_firstjan") #' plot(H_sw_D4_pond, time_column = "t_rel_to_max") #' #' H_sw_R1_stream <- read.TOXSWA_cwa("00003s_pa.cwa", #' basedir = "SwashProjects/project_H_sw/TOXSWA", #' zipfile = system.file("testdata/SwashProjects.zip", package = "pfm")) #' plot(H_sw_R1_stream, time_column = "t_rel_to_max") plot.TOXSWA_cwa <- function(x, time_column = c("datetime", "t", "t_firstjan", "t_rel_to_max"), xlab = "default", ylab = "default", add = FALSE, threshold_factor = 1000, thin_low = 1, total = FALSE, LC_TIME = "C", ...) { time_column = match.arg(time_column) cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L") plot_data <- x$cwas[c(time_column, cwa_column)] # Thin out the data that are less than 1/1000 of the maximum names(plot_data) <- c("time", "cwa") threshold <- max(plot_data$cwa) / threshold_factor plot_data_low <- subset(plot_data, cwa < threshold) plot_data_high <- subset(plot_data, cwa >= threshold) plot_data_low_thin <- plot_data_low[seq(from = 1, to = nrow(plot_data_low), by = thin_low), ] plot_data_combined <- rbind(plot_data_low_thin, plot_data_high) plot_data <- plot_data_combined[order(plot_data_combined$time), ] lct <- Sys.getlocale("LC_TIME") tmp <- Sys.setlocale("LC_TIME", LC_TIME) if (identical(xlab, "default")) { xlab = switch(time_column, datetime = "Time", t = "Time [days]", t_firstjan = "Time since first of January [days]", t_rel_to_max = "Time relative to maximum concentration [days]") } if (identical(ylab, "default")) { # Use LateX if the current plotting device is tikz if (names(dev.cur()) == "tikz output") { ylab = paste( ifelse(total, "Total concentration", "Concentration"), "[$\\mu$g/L]") } else { conc_string <- ifelse(total, "Total concentration", "Concentration") ylab = bquote(.(conc_string) ~ group("[", mu*g/L, "]")) } } if (add) { lines(plot_data, xlab = xlab, ylab = ylab, ...) } else{ if (time_column == "datetime") { plot(plot_data, type = "l", xlab = xlab, ylab = ylab, xaxt = "n", ...) seq_x <- seq(min(x$cwas$datetime), max(x$cwas$datetime), by = "quarter") axis(1, at = seq_x, labels = format(seq_x, "%b %Y")) } else { plot(plot_data, type = "l", xlab = xlab, ylab = ylab, ...) } } tmp <- Sys.setlocale("LC_TIME", lct) } #' R6 class for holding TOXSWA cwa concentration data and associated statistics #' #' An R6 class for holding TOXSWA cwa concentration data and some associated statistics. #' Usually, an instance of this class will be generated by \code{\link{read.TOXSWA_cwa}}. #' #' @docType class #' @importFrom R6 R6Class #' @export #' @format An \code{\link{R6Class}} generator object. #' @field filename Length one character vector. #' @field basedir Length one character vector. #' @field segment Length one integer, specifying for which segment the cwa data were read. #' @field cwas Dataframe holding the concentrations. #' @field events List of dataframes holding the event statistics for each threshold. #' @field windows Matrix of maximum time weighted average concentrations (TWAC_max) #' and areas under the curve in µg/day * h (AUC_max_h) or µg/day * d (AUC_max_d) #' for the requested moving window sizes in days. #' @section Methods: #' \describe{ #' \item{\code{get_events(threshold, total = FALSE)}}{ #' Populate a datataframe with event information for the specified threshold value #' in µg/L. If \code{total = TRUE}, the total concentration including the amount #' adsorbed to suspended matter will be used. The resulting dataframe is stored in the #' \code{events} field of the object. #' } #' \item{\code{moving_windows(windows, total = FALSE)}}{ #' Add to the \code{windows} field described above. #' Again, if \code{total = TRUE}, the total concentration including the amount #' adsorbed to suspended matter will be used. #' } #' } #' @examples #' H_sw_R1_stream <- read.TOXSWA_cwa("00003s_pa.cwa", #' basedir = "SwashProjects/project_H_sw/TOXSWA", #' zipfile = system.file("testdata/SwashProjects.zip", #' package = "pfm")) #' H_sw_R1_stream$get_events(c(2, 10)) #' H_sw_R1_stream$moving_windows(c(7, 21)) #' print(H_sw_R1_stream) #' @keywords data TOXSWA_cwa <- R6Class("TOXSWA_cwa", public = list( filename = NULL, basedir = NULL, zipfile = NULL, segment = NULL, substance = NULL, cwas = NULL, windows = NULL, events = list(), initialize = function(filename, basedir, zipfile = NULL, segment = "last", substance = "parent", total = FALSE) { self$filename <- filename self$basedir <- basedir self$zipfile <- zipfile if (!is.null(zipfile)) { try(file_connection <- unz(zipfile, paste0(basedir, "/", filename))) } else { try(file_connection <- file(file.path(basedir, filename), "rt")) } if (grepl(".cwa$", filename)) { # cwa file from FOCUS TOXSWA 3 (TOXSWA 2.x.y) cwa_all_segments <- try( read.table(file_connection, sep = "", skip = 40, encoding = "UTF-8", colClasses = c("character", "numeric", "integer", rep("numeric", 5)), col.names = c("datetime", "t", "segment", "xcd", "cwa_tot", "cwa", "Xss", "Xmp"))) if (is.null(zipfile)) close(file_connection) # only needed for files if (!inherits(cwa_all_segments, "try-error")) { available_segments = 1:max(cwa_all_segments$segment) if (segment == "last") segment = max(available_segments) if (!segment %in% available_segments) stop("Invalid segment specified") self$segment <- segment cwa <- subset(cwa_all_segments, segment == self$segment, c("datetime", "t", "segment", "cwa", "cwa_tot")) lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C") cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%H:%M", tz = "UTC") Sys.setlocale("LC_TIME", lct) startyear = format(cwa$datetime[1], "%Y") firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d", tz = "UTC") cwa$t_firstjan <- as.numeric(difftime(cwa$datetime, firstjan, units = "days")) t_max = cwa[which.max(cwa$cwa), "t"] cwa$t_rel_to_max = cwa$t - t_max cwa$cwa_mug_per_L <- cwa$cwa * 1000 cwa$cwa_tot_mug_per_L <- cwa$cwa_tot * 1000 self$cwas <- cwa[c("datetime", "t", "t_firstjan", "t_rel_to_max", "cwa_mug_per_L", "cwa_tot_mug_per_L")] } else { stop("Could not read ", filename) } } else { # out file from FOCUS TOXSWA 4 (TOXSWA 4.4.2 or similar) outfile <- try(readLines(file_connection)) close(file_connection) # only needed for files if (inherits(outfile, "try-error")) { stop("Could not read ", filename) } else { # Get the substance name(s) sub_lines <- grep(".*0.000.*ConLiqWatLayCur_", outfile[1:50], value = TRUE) substances <- gsub(".*ConLiqWatLayCur_(.*?) +[0-9].*", "\\1", sub_lines) if (!substance %in% c("parent", substances)) { stop("No data for substance ", substance, " present in the .out file.") } # Generate field name for the concentrations at end of hour for the # substance of interest if (substance == "parent") { cwa_string = paste0("ConLiqWatLayCur_", substances[1]) } else { cwa_string = paste0("ConLiqWatLayCur_", substance) } cwa_lines <- grep(cwa_string, outfile, value = TRUE) cwa_all_segments <- read_fwf(paste(cwa_lines, collapse = "\n"), fwf_empty(paste(tail(cwa_lines), collapse = "\n"))) available_segments = 1:(ncol(cwa_all_segments) - 3) if (segment == "last") segment = max(available_segments) if (!segment %in% available_segments) stop("Invalid segment specified") self$segment <- segment cwa <- data.frame( datetime = as.character(cwa_all_segments$X2), t = cwa_all_segments$X1, cwa = cwa_all_segments[[3 + segment]], stringsAsFactors = FALSE ) # Append time "-00h00" to datetime if there is no time (only 11 characters) # The fact that the time is missing at 00h00 was reported to Mark # Liedekerke, Wim Beltman, Paulien Adriaanse, and Chris Lythgo # on 14 December 2016 cwa <- within(cwa, datetime <- ifelse(nchar(datetime) == 11, paste0(datetime, "-00h00"), datetime)) if (total) { cwa_tot_lines <- outfile[grep("ConSysWatLay_", outfile)] # hourly total conc. cwa_tot_all_segments <- read.table(text = cwa_lines) cwa$cwa_tot = cwa_tot_all_segments[[3 + segment]] } lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C") cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%Hh%M", tz = "UTC") Sys.setlocale("LC_TIME", lct) startyear = format(cwa$datetime[1], "%Y") firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d", tz = "UTC") cwa$t_firstjan <- as.numeric(difftime(cwa$datetime, firstjan, units = "days")) t_max = cwa[which.max(cwa$cwa), "t"] cwa$t_rel_to_max = cwa$t - t_max cwa$cwa_mug_per_L <- cwa$cwa * 1000 self$cwas <- cwa[c("datetime", "t", "t_firstjan", "t_rel_to_max", "cwa_mug_per_L")] if (total) { self$cwas$cwa_tot_mug_per_L <- cwa$cwa_tot * 1000 } } } }, moving_windows = function(windows, total = FALSE) { window_names = paste(windows, "days") n = length(window_names) self$windows <- data.frame(window = window_names, max_TWAC = numeric(n), max_AUC_h = numeric(n), max_AUC_d = numeric(n)) if (missing(windows)) { stop("You need to specify at least one moving window size in days") } cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L") for (i in seq_along(windows)) { window_size = windows[i] filter_size = window_size * 24 max_TWAC = max(filter(self$cwas[cwa_column], rep(1/filter_size, filter_size), "convolution"), na.rm = TRUE) max_AUC_h = max_TWAC * filter_size max_AUC_d = max_TWAC * window_size self$windows[i, -1] = c(max_TWAC, max_AUC_h, max_AUC_d) } invisible(self) }, get_events = function(thresholds, total = FALSE) { if (missing(thresholds)) { stop("You need to specify at least one threshold concentration in \u03bcg/L") } for (threshold in thresholds) { events = data.frame(t_start = numeric(), cwa_max = numeric(), duration = numeric(), pre_interval = numeric(), AUC_h = numeric(), AUC_d = numeric()) cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L") event_end = 0 event = FALSE event_max = 0 event_nr = 0 n_rows = nrow(self$cwas) for (i in 1:n_rows) { cwa_cur = self$cwas[i, cwa_column] if (event == FALSE) { if (cwa_cur > threshold) { event_start = self$cwas[i, "t"] pre_interval = event_start - event_end i_start = i event = TRUE event_max = cwa_cur } } else { if (cwa_cur > event_max) event_max = cwa_cur if (cwa_cur < threshold || i == n_rows) { event_nr = event_nr + 1 i_end = i if (i == n_rows) i_end = i event_end = self$cwas[i_end, "t"] event_length = event_end - event_start event_cwas <- self$cwas[i_start:(i_end - 1), cwa_column] event_AUC_h = sum(event_cwas) event_AUC_d = event_AUC_h / 24 events[event_nr, ] = c(event_start, event_max, event_length, pre_interval, event_AUC_h, event_AUC_d) event = FALSE } } } self$events[[as.character(threshold)]] <- events } invisible(self) }, print = function() { cat(" data from file", self$filename, "segment", self$segment, "\n") print(head(self$cwas)) cat("Moving window analysis\n") print(self$windows) for (threshold in names(self$events)) { cat("Event statistics for threshold", threshold, "\n") if (nrow(self$events[[threshold]]) == 0) cat("No events found\n") else print(self$events[[threshold]]) } } ) ) # vim: set ts=2 sw=2 expandtab: