aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorJohannes Ranke <jranke@uni-bremen.de>2017-11-04 11:48:08 +0100
committerJohannes Ranke <jranke@uni-bremen.de>2017-11-04 11:48:08 +0100
commit0e1d517d4d6351f2d43ab8636363e73d8b8cf677 (patch)
tree6f5dedbb8ebd26534667aa2b2bd13b6a6ba6490f /R
parenteb4465035dd44907eae3ea0340221316b7cfca12 (diff)
Option to thin low TOXSWA PECsw data for plotting
to reduce the file size of plots e.g. as PDF files
Diffstat (limited to 'R')
-rw-r--r--R/TOXSWA_cwa.R96
1 files changed, 60 insertions, 36 deletions
diff --git a/R/TOXSWA_cwa.R b/R/TOXSWA_cwa.R
index 2985e1f..3d52f31 100644
--- a/R/TOXSWA_cwa.R
+++ b/R/TOXSWA_cwa.R
@@ -27,9 +27,9 @@
#' out file (FOCUS TOXSWA 4, i.e. TOXSWA 4.4.2 or similar).
#' @param basedir The path to the directory where the cwa file resides.
#' @param zipfile Optional path to a zip file containing the cwa file.
-#' @param segment The segment for which the data should be read. Either "last", or
+#' @param segment The segment for which the data should be read. Either "last", or
#' the segment number.
-#' @param total Set this to TRUE in order to read total concentrations as well. This is
+#' @param total Set this to TRUE in order to read total concentrations as well. This is
#' only necessary for .out files as generated by TOXSWA 4.4.2 or similar, not for .cwa
#' files. For .cwa files, the total concentration is always read as well.
#' @param substance For TOXSWA 4 .out files, the default value "parent" leads
@@ -38,7 +38,7 @@
#' @param windows Numeric vector of width of moving windows in days, for calculating
#' maximum time weighted average concentrations and areas under the curve.
#' @param thresholds Numeric vector of threshold concentrations in µg/L for
-#' generating event statistics.
+#' generating event statistics.
#' @importFrom readr read_fwf fwf_empty
#' @return An instance of an R6 object of class
#' \code{\link{TOXSWA_cwa}}.
@@ -49,13 +49,13 @@
#' basedir = "SwashProjects/project_H_sw/TOXSWA",
#' zipfile = system.file("testdata/SwashProjects.zip",
#' package = "pfm"))
-read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL,
+read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL,
segment = "last", substance = "parent",
total = FALSE,
windows = NULL, thresholds = NULL)
{
if (!missing(filename)) {
- cwa <- TOXSWA_cwa$new(filename, basedir, zipfile,
+ cwa <- TOXSWA_cwa$new(filename, basedir, zipfile,
substance = substance, total = total)
if (!is.null(windows[1])) cwa$moving_windows(windows)
if (!is.null(thresholds[1])) cwa$get_events(thresholds)
@@ -76,6 +76,11 @@ read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL,
#' @param time_column What should be used for the time axis. If "t_firstjan" is chosen,
#' the time is given in days relative to the first of January in the first year.
#' @param add Should we add to an existing plot?
+#' @param threshold_factor The factor by which the data have to be lower than the maximum
+#' in order to get thinned for plotting (see next argument).
+#' @param thin_low If an integer greater than 1, the data close to zero (smaller than
+#' 1/threshold_factor of the maximum) in the series will be thinned by this factor
+#' in order to decrease the amount of data that is included in the plots
#' @param total Should the total concentration in water be plotted, including substance sorbed
#' to suspended matter?
#' @param LC_TIME Specification of the locale used to format dates
@@ -84,17 +89,36 @@ read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL,
#' @author Johannes Ranke
#' @examples
#' H_sw_D4_pond <- read.TOXSWA_cwa("00001p_pa.cwa",
-#' basedir = "SwashProjects/project_H_sw/TOXSWA",
-#' zipfile = system.file("testdata/SwashProjects.zip",
-#' package = "pfm"))
+#' basedir = "SwashProjects/project_H_sw/TOXSWA",
+#' zipfile = system.file("testdata/SwashProjects.zip", package = "pfm"))
#' plot(H_sw_D4_pond)
+#' plot(H_sw_D4_pond, time_column = "t")
+#' plot(H_sw_D4_pond, time_column = "t_firstjan")
+#' plot(H_sw_D4_pond, time_column = "t_rel_to_max")
+#'
+#' H_sw_R1_stream <- read.TOXSWA_cwa("00003s_pa.cwa",
+#' basedir = "SwashProjects/project_H_sw/TOXSWA",
+#' zipfile = system.file("testdata/SwashProjects.zip", package = "pfm"))
+#' plot(H_sw_R1_stream, time_column = "t_rel_to_max")
plot.TOXSWA_cwa <- function(x, time_column = c("datetime", "t", "t_firstjan", "t_rel_to_max"),
xlab = "default", ylab = "default",
add = FALSE,
+ threshold_factor = 1000, thin_low = 1,
total = FALSE, LC_TIME = "C", ...)
{
time_column = match.arg(time_column)
cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L")
+ plot_data <- x$cwas[c(time_column, cwa_column)]
+
+ # Thin out the data that are less than 1/1000 of the maximum
+ names(plot_data) <- c("time", "cwa")
+ threshold <- max(plot_data$cwa) / threshold_factor
+ plot_data_low <- subset(plot_data, cwa < threshold)
+ plot_data_high <- subset(plot_data, cwa >= threshold)
+ plot_data_low_thin <- plot_data_low[seq(from = 1, to = nrow(plot_data_low), by = thin_low), ]
+ plot_data_combined <- rbind(plot_data_low_thin, plot_data_high)
+ plot_data <- plot_data_combined[order(plot_data_combined$time), ]
+
lct <- Sys.getlocale("LC_TIME")
tmp <- Sys.setlocale("LC_TIME", LC_TIME)
if (identical(xlab, "default")) {
@@ -102,21 +126,21 @@ plot.TOXSWA_cwa <- function(x, time_column = c("datetime", "t", "t_firstjan", "t
datetime = "Time",
t = "Time [days]",
t_firstjan = "Time since first of January [days]",
- t_rel_to_max = "Time relative to maximum concentration [days]")
+ t_rel_to_max = "Time relative to maximum concentration [days]")
}
if (identical(ylab, "default")) {
ylab = paste( ifelse(total, "Total concentration", "Concentration"), "[\u03bcg/L]")
}
if (add) {
- lines(x$cwas[c(time_column, cwa_column)], xlab = xlab, ylab = ylab, ...)
+ lines(plot_data, xlab = xlab, ylab = ylab, ...)
} else{
if (time_column == "datetime") {
- plot(x$cwas$datetime, x$cwas[[cwa_column]], type = "l",
+ plot(plot_data, type = "l",
xlab = xlab, ylab = ylab, xaxt = "n", ...)
seq_x <- seq(min(x$cwas$datetime), max(x$cwas$datetime), by = "quarter")
axis(1, at = seq_x, labels = format(seq_x, "%b %Y"))
} else {
- plot(x$cwas[c(time_column, cwa_column)], type = "l",
+ plot(plot_data, type = "l",
xlab = xlab, ylab = ylab, ...)
}
}
@@ -138,14 +162,14 @@ plot.TOXSWA_cwa <- function(x, time_column = c("datetime", "t", "t_firstjan", "t
#' @field cwas Dataframe holding the concentrations.
#' @field events List of dataframes holding the event statistics for each threshold.
#' @field windows Matrix of maximum time weighted average concentrations (TWAC_max)
-#' and areas under the curve in µg/day * h (AUC_max_h) or µg/day * d (AUC_max_d)
+#' and areas under the curve in µg/day * h (AUC_max_h) or µg/day * d (AUC_max_d)
#' for the requested moving window sizes in days.
#' @section Methods:
#' \describe{
#' \item{\code{get_events(threshold, total = FALSE)}}{
#' Populate a datataframe with event information for the specified threshold value
#' in µg/L. If \code{total = TRUE}, the total concentration including the amount
-#' adsorbed to suspended matter will be used. The resulting dataframe is stored in the
+#' adsorbed to suspended matter will be used. The resulting dataframe is stored in the
#' \code{events} field of the object.
#' }
#' \item{\code{moving_windows(windows, total = FALSE)}}{
@@ -174,7 +198,7 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa",
cwas = NULL,
windows = NULL,
events = list(),
- initialize = function(filename, basedir, zipfile = NULL,
+ initialize = function(filename, basedir, zipfile = NULL,
segment = "last", substance = "parent", total = FALSE) {
self$filename <- filename
self$basedir <- basedir
@@ -184,7 +208,7 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa",
} else {
try(file_connection <- file(file.path(basedir, filename), "rt"))
}
-
+
if (grepl(".cwa$", filename)) {
# cwa file from FOCUS TOXSWA 3 (TOXSWA 2.x.y)
@@ -204,15 +228,15 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa",
if (segment == "last") segment = max(available_segments)
if (!segment %in% available_segments) stop("Invalid segment specified")
self$segment <- segment
- cwa <- subset(cwa_all_segments, segment == self$segment,
+ cwa <- subset(cwa_all_segments, segment == self$segment,
c("datetime", "t", "segment", "cwa", "cwa_tot"))
lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C")
cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%H:%M", tz = "UTC")
Sys.setlocale("LC_TIME", lct)
- startyear = format(cwa$datetime[1], "%Y")
+ startyear = format(cwa$datetime[1], "%Y")
firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d",
tz = "UTC")
- cwa$t_firstjan <- as.numeric(difftime(cwa$datetime,
+ cwa$t_firstjan <- as.numeric(difftime(cwa$datetime,
firstjan, units = "days"))
t_max = cwa[which.max(cwa$cwa), "t"]
@@ -221,7 +245,7 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa",
cwa$cwa_tot_mug_per_L <- cwa$cwa_tot * 1000
self$cwas <- cwa[c("datetime", "t", "t_firstjan",
"t_rel_to_max",
- "cwa_mug_per_L",
+ "cwa_mug_per_L",
"cwa_tot_mug_per_L")]
} else {
stop("Could not read ", filename)
@@ -241,7 +265,7 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa",
if (!substance %in% c("parent", substances)) {
stop("No data for substance ", substance, " present in the .out file.")
}
-
+
# Generate field name for the concentrations at end of hour for the
# substance of interest
if (substance == "parent") {
@@ -250,9 +274,9 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa",
cwa_string = paste0("ConLiqWatLayCur_", substance)
}
- cwa_lines <- grep(cwa_string, outfile, value = TRUE)
+ cwa_lines <- grep(cwa_string, outfile, value = TRUE)
- cwa_all_segments <- read_fwf(paste(cwa_lines, collapse = "\n"),
+ cwa_all_segments <- read_fwf(paste(cwa_lines, collapse = "\n"),
fwf_empty(paste(tail(cwa_lines), collapse = "\n")))
@@ -269,26 +293,26 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa",
# Append time "-00h00" to datetime if there is no time (only 11 characters)
# The fact that the time is missing at 00h00 was reported to Mark
- # Liedekerke, Wim Beltman, Paulien Adriaanse, and Chris Lythgo
- # on 14 December 2016
- cwa <- within(cwa,
+ # Liedekerke, Wim Beltman, Paulien Adriaanse, and Chris Lythgo
+ # on 14 December 2016
+ cwa <- within(cwa,
datetime <- ifelse(nchar(datetime) == 11,
paste0(datetime, "-00h00"),
datetime))
if (total) {
cwa_tot_lines <- outfile[grep("ConSysWatLay_", outfile)] # hourly total conc.
- cwa_tot_all_segments <- read.table(text = cwa_lines)
+ cwa_tot_all_segments <- read.table(text = cwa_lines)
cwa$cwa_tot = cwa_tot_all_segments[[3 + segment]]
}
lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C")
cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%Hh%M", tz = "UTC")
Sys.setlocale("LC_TIME", lct)
- startyear = format(cwa$datetime[1], "%Y")
- firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d",
+ startyear = format(cwa$datetime[1], "%Y")
+ firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d",
tz = "UTC")
- cwa$t_firstjan <- as.numeric(difftime(cwa$datetime,
+ cwa$t_firstjan <- as.numeric(difftime(cwa$datetime,
firstjan, units = "days"))
t_max = cwa[which.max(cwa$cwa), "t"]
cwa$t_rel_to_max = cwa$t - t_max
@@ -316,11 +340,11 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa",
for (i in seq_along(windows)) {
window_size = windows[i]
filter_size = window_size * 24
- max_TWAC = max(filter(self$cwas[cwa_column], rep(1/filter_size, filter_size),
+ max_TWAC = max(filter(self$cwas[cwa_column], rep(1/filter_size, filter_size),
"convolution"), na.rm = TRUE)
max_AUC_h = max_TWAC * filter_size
max_AUC_d = max_TWAC * window_size
-
+
self$windows[i, -1] = c(max_TWAC, max_AUC_h, max_AUC_d)
}
invisible(self)
@@ -330,8 +354,8 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa",
stop("You need to specify at least one threshold concentration in \u03bcg/L")
}
for (threshold in thresholds) {
- events = data.frame(t_start = numeric(), cwa_max = numeric(),
- duration = numeric(), pre_interval = numeric(),
+ events = data.frame(t_start = numeric(), cwa_max = numeric(),
+ duration = numeric(), pre_interval = numeric(),
AUC_h = numeric(), AUC_d = numeric())
cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L")
@@ -364,10 +388,10 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa",
events[event_nr, ] = c(event_start, event_max, event_length,
pre_interval, event_AUC_h, event_AUC_d)
event = FALSE
- }
+ }
}
}
-
+
self$events[[as.character(threshold)]] <- events
}
invisible(self)

Contact - Imprint