# Copyright (C) 2014,2015,2016,2017,2018 Johannes Ranke
# Contact: jranke@uni-bremen.de
# This file is part of the R package pfm
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
# You should have received a copy of the GNU General Public License along with
# this program. If not, see
if(getRversion() >= '2.15.1') utils::globalVariables(c("cwa"))
#' Read TOXSWA surface water concentrations
#'
#' Read TOXSWA hourly concentrations of a chemical substance in a specific
#' segment of a TOXSWA surface water body. Per default, the data for the last
#' segment are imported. As TOXSWA 4 reports the values at the end of the hour
#' (ConLiqWatLayCur) in its summary file, we use this value as well instead
#' of the hourly averages (ConLiqWatLay). In TOXSWA 5.5.3 this variable was
#' renamed to ConLiqWatLay in the out file.
#'
#' @param filename The filename of the cwa file (TOXSWA 2.x.y or similar) or the
#' out file when using FOCUS TOXSWA 4 (i.e. TOXSWA 4.4.2) or higher.
#' @param basedir The path to the directory where the cwa file resides.
#' @param zipfile Optional path to a zip file containing the cwa file.
#' @param segment The segment for which the data should be read. Either "last", or
#' the segment number.
#' @param total Set this to TRUE in order to read total concentrations as well. This is
#' only necessary for .out files as generated by TOXSWA 4.4.2 or similar, not for .cwa
#' files. For .cwa files, the total concentration is always read as well.
#' @param substance For .out files, the default value "parent" leads
#' to reading concentrations of the parent compound. Alternatively, the substance
#' of interested can be selected by its code name.
#' @param windows Numeric vector of width of moving windows in days, for calculating
#' maximum time weighted average concentrations and areas under the curve.
#' @param thresholds Numeric vector of threshold concentrations in µg/L for
#' generating event statistics.
#' @importFrom readr read_fwf fwf_empty
#' @importFrom grDevices dev.cur
#' @return An instance of an R6 object of class
#' \code{\link{TOXSWA_cwa}}.
#' @export
#' @author Johannes Ranke
#' @examples
#' H_sw_D4_pond <- read.TOXSWA_cwa("00001p_pa.cwa",
#' basedir = "SwashProjects/project_H_sw/TOXSWA",
#' zipfile = system.file("testdata/SwashProjects.zip",
#' package = "pfm"))
read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL,
segment = "last", substance = "parent",
total = FALSE,
windows = NULL, thresholds = NULL)
{
if (!missing(filename)) {
cwa <- TOXSWA_cwa$new(filename, basedir, zipfile,
substance = substance, total = total)
if (!is.null(windows[1])) cwa$moving_windows(windows)
if (!is.null(thresholds[1])) cwa$get_events(thresholds)
invisible(cwa)
} else {
message("You need to specify a filename for the cwa file to be read")
}
}
#' Plot TOXSWA surface water concentrations
#'
#' Plot TOXSWA hourly concentrations of a chemical substance in a specific
#' segment of a TOXSWA surface water body.
#'
#' @import graphics
#' @param x The TOXSWA_cwa object to be plotted.
#' @param xlab,ylab Labels for x and y axis.
#' @param time_column What should be used for the time axis. If "t_firstjan" is chosen,
#' the time is given in days relative to the first of January in the first year.
#' @param add Should we add to an existing plot?
#' @param threshold_factor The factor by which the data have to be lower than the maximum
#' in order to get thinned for plotting (see next argument).
#' @param thin_low If an integer greater than 1, the data close to zero (smaller than
#' 1/threshold_factor of the maximum) in the series will be thinned by this factor
#' in order to decrease the amount of data that is included in the plots
#' @param total Should the total concentration in water be plotted, including substance sorbed
#' to suspended matter?
#' @param LC_TIME Specification of the locale used to format dates
#' @param ... Further arguments passed to \code{plot} if we are not adding to an existing plot
#' @export
#' @author Johannes Ranke
#' @examples
#' H_sw_D4_pond <- read.TOXSWA_cwa("00001p_pa.cwa",
#' basedir = "SwashProjects/project_H_sw/TOXSWA",
#' zipfile = system.file("testdata/SwashProjects.zip", package = "pfm"))
#' plot(H_sw_D4_pond)
#' plot(H_sw_D4_pond, time_column = "t")
#' plot(H_sw_D4_pond, time_column = "t_firstjan")
#' plot(H_sw_D4_pond, time_column = "t_rel_to_max")
#'
#' H_sw_R1_stream <- read.TOXSWA_cwa("00003s_pa.cwa",
#' basedir = "SwashProjects/project_H_sw/TOXSWA",
#' zipfile = system.file("testdata/SwashProjects.zip", package = "pfm"))
#' plot(H_sw_R1_stream, time_column = "t_rel_to_max")
plot.TOXSWA_cwa <- function(x, time_column = c("datetime", "t", "t_firstjan", "t_rel_to_max"),
xlab = "default", ylab = "default",
add = FALSE,
threshold_factor = 1000, thin_low = 1,
total = FALSE, LC_TIME = "C", ...)
{
time_column = match.arg(time_column)
cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L")
plot_data <- x$cwas[c(time_column, cwa_column)]
# Thin out the data that are less than 1/1000 of the maximum
names(plot_data) <- c("time", "cwa")
threshold <- max(plot_data$cwa) / threshold_factor
plot_data_low <- subset(plot_data, cwa < threshold)
plot_data_high <- subset(plot_data, cwa >= threshold)
plot_data_low_thin <- plot_data_low[seq(from = 1, to = nrow(plot_data_low), by = thin_low), ]
plot_data_combined <- rbind(plot_data_low_thin, plot_data_high)
plot_data <- plot_data_combined[order(plot_data_combined$time), ]
lct <- Sys.getlocale("LC_TIME")
tmp <- Sys.setlocale("LC_TIME", LC_TIME)
if (identical(xlab, "default")) {
xlab = switch(time_column,
datetime = "Time",
t = "Time [days]",
t_firstjan = "Time since first of January [days]",
t_rel_to_max = "Time relative to maximum concentration [days]")
}
if (identical(ylab, "default")) {
# Use LateX if the current plotting device is tikz
if (names(dev.cur()) == "tikz output") {
ylab = paste( ifelse(total, "Total concentration", "Concentration"), "[$\\mu$g/L]")
} else {
conc_string <- ifelse(total, "Total concentration", "Concentration")
ylab = bquote(.(conc_string) ~ group("[", mu*g/L, "]"))
}
}
if (add) {
lines(plot_data, xlab = xlab, ylab = ylab, ...)
} else{
if (time_column == "datetime") {
plot(plot_data, type = "l",
xlab = xlab, ylab = ylab, xaxt = "n", ...)
seq_x <- seq(min(x$cwas$datetime), max(x$cwas$datetime), by = "quarter")
axis(1, at = seq_x, labels = format(seq_x, "%b %Y"))
} else {
plot(plot_data, type = "l",
xlab = xlab, ylab = ylab, ...)
}
}
tmp <- Sys.setlocale("LC_TIME", lct)
}
#' R6 class for holding TOXSWA water concentration data and associated statistics
#'
#' An R6 class for holding TOXSWA water concentration (cwa) data and some associated statistics.
#' Usually, an instance of this class will be generated by \code{\link{read.TOXSWA_cwa}}.
#'
#' @docType class
#' @importFrom R6 R6Class
#' @export
#' @format An \code{\link{R6Class}} generator object.
#' @field filename Length one character vector.
#' @field basedir Length one character vector.
#' @field segment Length one integer, specifying for which segment the cwa data were read.
#' @field cwas Dataframe holding the concentrations.
#' @field events List of dataframes holding the event statistics for each threshold.
#' @field windows Matrix of maximum time weighted average concentrations (TWAC_max)
#' and areas under the curve in µg/day * h (AUC_max_h) or µg/day * d (AUC_max_d)
#' for the requested moving window sizes in days.
#' @section Methods:
#' \describe{
#' \item{\code{get_events(threshold, total = FALSE)}}{
#' Populate a datataframe with event information for the specified threshold value
#' in µg/L. If \code{total = TRUE}, the total concentration including the amount
#' adsorbed to suspended matter will be used. The resulting dataframe is stored in the
#' \code{events} field of the object.
#' }
#' \item{\code{moving_windows(windows, total = FALSE)}}{
#' Add to the \code{windows} field described above.
#' Again, if \code{total = TRUE}, the total concentration including the amount
#' adsorbed to suspended matter will be used.
#' }
#' }
#' @examples
#' H_sw_R1_stream <- read.TOXSWA_cwa("00003s_pa.cwa",
#' basedir = "SwashProjects/project_H_sw/TOXSWA",
#' zipfile = system.file("testdata/SwashProjects.zip",
#' package = "pfm"))
#' H_sw_R1_stream$get_events(c(2, 10))
#' H_sw_R1_stream$moving_windows(c(7, 21))
#' print(H_sw_R1_stream)
#' @keywords data
TOXSWA_cwa <- R6Class("TOXSWA_cwa",
public = list(
filename = NULL,
basedir = NULL,
zipfile = NULL,
segment = NULL,
substance = NULL,
cwas = NULL,
windows = NULL,
events = list(),
initialize = function(filename, basedir, zipfile = NULL,
segment = "last", substance = "parent", total = FALSE) {
self$filename <- filename
self$basedir <- basedir
self$zipfile <- zipfile
if (!is.null(zipfile)) {
try(file_connection <- unz(zipfile, paste0(basedir, "/", filename)))
} else {
try(file_connection <- file(file.path(basedir, filename), "rt"))
}
if (grepl(".cwa$", filename)) {
# cwa file from FOCUS TOXSWA 3 (TOXSWA 2.x.y)
cwa_all_segments <- try(
read.table(file_connection,
sep = "", skip = 40,
encoding = "UTF-8",
colClasses = c("character", "numeric",
"integer", rep("numeric", 5)),
col.names = c("datetime", "t", "segment",
"xcd", "cwa_tot", "cwa", "Xss", "Xmp")))
if (is.null(zipfile)) close(file_connection) # only needed for files
if (!inherits(cwa_all_segments, "try-error")) {
available_segments = 1:max(cwa_all_segments$segment)
if (segment == "last") segment = max(available_segments)
if (!segment %in% available_segments) stop("Invalid segment specified")
self$segment <- segment
cwa <- subset(cwa_all_segments, segment == self$segment,
c("datetime", "t", "segment", "cwa", "cwa_tot"))
lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C")
cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%H:%M", tz = "UTC")
Sys.setlocale("LC_TIME", lct)
startyear = format(cwa$datetime[1], "%Y")
firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d",
tz = "UTC")
cwa$t_firstjan <- as.numeric(difftime(cwa$datetime,
firstjan, units = "days"))
t_max = cwa[which.max(cwa$cwa), "t"]
cwa$t_rel_to_max = cwa$t - t_max
cwa$cwa_mug_per_L <- cwa$cwa * 1000
cwa$cwa_tot_mug_per_L <- cwa$cwa_tot * 1000
self$cwas <- cwa[c("datetime", "t", "t_firstjan",
"t_rel_to_max",
"cwa_mug_per_L",
"cwa_tot_mug_per_L")]
} else {
stop("Could not read ", filename)
}
} else {
# out file from FOCUS TOXSWA 4 or higher
outfile <- try(readLines(file_connection))
focus_toxswa_version <- gsub(".*: ", "", outfile[4])
if (substr(focus_toxswa_version, 1, 1) == "3") {
cwastring = "ConLiqWatLayCur"
} else {
cwastring = "ConLiqWatLay"
}
close(file_connection) # only needed for files
if (inherits(outfile, "try-error")) {
stop("Could not read ", filename)
} else {
# Get the substance name(s)
sub_lines <- grep(paste0(".*0.000.*", cwastring, "_"), outfile[1:50], value = TRUE)
substances <- gsub(paste0(".*", cwastring, "_(.*?) +[0-9].*"), "\\1", sub_lines)
if (!substance %in% c("parent", substances)) {
stop("No data for substance ", substance, " present in the .out file.")
}
# Generate field name for the concentrations at end of hour for the
# substance of interest
if (substance == "parent") {
cwa_string = paste0(cwastring, "_", substances[1])
} else {
cwa_string = paste0(cwastring, "_", substance)
}
cwa_lines <- grep(cwa_string, outfile, value = TRUE)
cwa_all_segments <- read_fwf(paste(cwa_lines, collapse = "\n"),
fwf_empty(paste(tail(cwa_lines), collapse = "\n")))
available_segments = 1:(ncol(cwa_all_segments) - 3)
if (segment == "last") segment = max(available_segments)
if (!segment %in% available_segments) stop("Invalid segment specified")
self$segment <- segment
cwa <- data.frame(
datetime = as.character(cwa_all_segments$X2),
t = cwa_all_segments$X1,
cwa = cwa_all_segments[[3 + segment]],
stringsAsFactors = FALSE
)
# Append time "-00h00" to datetime if there is no time (only 11 characters)
# The fact that the time is missing at 00h00 was reported to Mark
# Liedekerke, Wim Beltman, Paulien Adriaanse, and Chris Lythgo
# on 14 December 2016
cwa <- within(cwa,
datetime <- ifelse(nchar(datetime) == 11,
paste0(datetime, "-00h00"),
datetime))
if (total) {
cwa_tot_lines <- outfile[grep("ConSysWatLay_", outfile)] # hourly total conc.
cwa_tot_all_segments <- read.table(text = cwa_lines)
cwa$cwa_tot = cwa_tot_all_segments[[3 + segment]]
}
lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C")
cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%Hh%M", tz = "UTC")
Sys.setlocale("LC_TIME", lct)
startyear = format(cwa$datetime[1], "%Y")
firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d",
tz = "UTC")
cwa$t_firstjan <- as.numeric(difftime(cwa$datetime,
firstjan, units = "days"))
t_max = cwa[which.max(cwa$cwa), "t"]
cwa$t_rel_to_max = cwa$t - t_max
cwa$cwa_mug_per_L <- cwa$cwa * 1000
self$cwas <- cwa[c("datetime", "t", "t_firstjan",
"t_rel_to_max",
"cwa_mug_per_L")]
if (total) {
self$cwas$cwa_tot_mug_per_L <- cwa$cwa_tot * 1000
}
}
}
},
moving_windows = function(windows, total = FALSE) {
window_names = paste(windows, "days")
n = length(window_names)
self$windows <- data.frame(window = window_names,
max_TWAC = numeric(n),
max_AUC_h = numeric(n),
max_AUC_d = numeric(n))
if (missing(windows)) {
stop("You need to specify at least one moving window size in days")
}
cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L")
for (i in seq_along(windows)) {
window_size = windows[i]
filter_size = window_size * 24
max_TWAC = max(filter(self$cwas[cwa_column], rep(1/filter_size, filter_size),
"convolution"), na.rm = TRUE)
max_AUC_h = max_TWAC * filter_size
max_AUC_d = max_TWAC * window_size
self$windows[i, -1] = c(max_TWAC, max_AUC_h, max_AUC_d)
}
invisible(self)
},
get_events = function(thresholds, total = FALSE) {
if (missing(thresholds)) {
stop("You need to specify at least one threshold concentration in \u03bcg/L")
}
for (threshold in thresholds) {
events = data.frame(t_start = numeric(), cwa_max = numeric(),
duration = numeric(), pre_interval = numeric(),
AUC_h = numeric(), AUC_d = numeric())
cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L")
event_end = 0
event = FALSE
event_max = 0
event_nr = 0
n_rows = nrow(self$cwas)
for (i in 1:n_rows) {
cwa_cur = self$cwas[i, cwa_column]
if (event == FALSE) {
if (cwa_cur > threshold) {
event_start = self$cwas[i, "t"]
pre_interval = event_start - event_end
i_start = i
event = TRUE
event_max = cwa_cur
}
} else {
if (cwa_cur > event_max) event_max = cwa_cur
if (cwa_cur < threshold || i == n_rows) {
event_nr = event_nr + 1
i_end = i
if (i == n_rows) i_end = i
event_end = self$cwas[i_end, "t"]
event_length = event_end - event_start
event_cwas <- self$cwas[i_start:(i_end - 1), cwa_column]
event_AUC_h = sum(event_cwas)
event_AUC_d = event_AUC_h / 24
events[event_nr, ] = c(event_start, event_max, event_length,
pre_interval, event_AUC_h, event_AUC_d)
event = FALSE
}
}
}
self$events[[as.character(threshold)]] <- events
}
invisible(self)
},
print = function() {
cat(" data from file", self$filename, "segment", self$segment, "\n")
print(head(self$cwas))
cat("Moving window analysis\n")
print(self$windows)
for (threshold in names(self$events)) {
cat("Event statistics for threshold", threshold, "\n")
if (nrow(self$events[[threshold]]) == 0) cat("No events found\n")
else print(self$events[[threshold]])
}
}
)
)
# vim: set ts=2 sw=2 expandtab: