From 0e1d517d4d6351f2d43ab8636363e73d8b8cf677 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Sat, 4 Nov 2017 11:48:08 +0100 Subject: Option to thin low TOXSWA PECsw data for plotting to reduce the file size of plots e.g. as PDF files --- R/TOXSWA_cwa.R | 96 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 60 insertions(+), 36 deletions(-) (limited to 'R') diff --git a/R/TOXSWA_cwa.R b/R/TOXSWA_cwa.R index 2985e1f..3d52f31 100644 --- a/R/TOXSWA_cwa.R +++ b/R/TOXSWA_cwa.R @@ -27,9 +27,9 @@ #' out file (FOCUS TOXSWA 4, i.e. TOXSWA 4.4.2 or similar). #' @param basedir The path to the directory where the cwa file resides. #' @param zipfile Optional path to a zip file containing the cwa file. -#' @param segment The segment for which the data should be read. Either "last", or +#' @param segment The segment for which the data should be read. Either "last", or #' the segment number. -#' @param total Set this to TRUE in order to read total concentrations as well. This is +#' @param total Set this to TRUE in order to read total concentrations as well. This is #' only necessary for .out files as generated by TOXSWA 4.4.2 or similar, not for .cwa #' files. For .cwa files, the total concentration is always read as well. #' @param substance For TOXSWA 4 .out files, the default value "parent" leads @@ -38,7 +38,7 @@ #' @param windows Numeric vector of width of moving windows in days, for calculating #' maximum time weighted average concentrations and areas under the curve. #' @param thresholds Numeric vector of threshold concentrations in µg/L for -#' generating event statistics. +#' generating event statistics. #' @importFrom readr read_fwf fwf_empty #' @return An instance of an R6 object of class #' \code{\link{TOXSWA_cwa}}. @@ -49,13 +49,13 @@ #' basedir = "SwashProjects/project_H_sw/TOXSWA", #' zipfile = system.file("testdata/SwashProjects.zip", #' package = "pfm")) -read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL, +read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL, segment = "last", substance = "parent", total = FALSE, windows = NULL, thresholds = NULL) { if (!missing(filename)) { - cwa <- TOXSWA_cwa$new(filename, basedir, zipfile, + cwa <- TOXSWA_cwa$new(filename, basedir, zipfile, substance = substance, total = total) if (!is.null(windows[1])) cwa$moving_windows(windows) if (!is.null(thresholds[1])) cwa$get_events(thresholds) @@ -76,6 +76,11 @@ read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL, #' @param time_column What should be used for the time axis. If "t_firstjan" is chosen, #' the time is given in days relative to the first of January in the first year. #' @param add Should we add to an existing plot? +#' @param threshold_factor The factor by which the data have to be lower than the maximum +#' in order to get thinned for plotting (see next argument). +#' @param thin_low If an integer greater than 1, the data close to zero (smaller than +#' 1/threshold_factor of the maximum) in the series will be thinned by this factor +#' in order to decrease the amount of data that is included in the plots #' @param total Should the total concentration in water be plotted, including substance sorbed #' to suspended matter? #' @param LC_TIME Specification of the locale used to format dates @@ -84,17 +89,36 @@ read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL, #' @author Johannes Ranke #' @examples #' H_sw_D4_pond <- read.TOXSWA_cwa("00001p_pa.cwa", -#' basedir = "SwashProjects/project_H_sw/TOXSWA", -#' zipfile = system.file("testdata/SwashProjects.zip", -#' package = "pfm")) +#' basedir = "SwashProjects/project_H_sw/TOXSWA", +#' zipfile = system.file("testdata/SwashProjects.zip", package = "pfm")) #' plot(H_sw_D4_pond) +#' plot(H_sw_D4_pond, time_column = "t") +#' plot(H_sw_D4_pond, time_column = "t_firstjan") +#' plot(H_sw_D4_pond, time_column = "t_rel_to_max") +#' +#' H_sw_R1_stream <- read.TOXSWA_cwa("00003s_pa.cwa", +#' basedir = "SwashProjects/project_H_sw/TOXSWA", +#' zipfile = system.file("testdata/SwashProjects.zip", package = "pfm")) +#' plot(H_sw_R1_stream, time_column = "t_rel_to_max") plot.TOXSWA_cwa <- function(x, time_column = c("datetime", "t", "t_firstjan", "t_rel_to_max"), xlab = "default", ylab = "default", add = FALSE, + threshold_factor = 1000, thin_low = 1, total = FALSE, LC_TIME = "C", ...) { time_column = match.arg(time_column) cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L") + plot_data <- x$cwas[c(time_column, cwa_column)] + + # Thin out the data that are less than 1/1000 of the maximum + names(plot_data) <- c("time", "cwa") + threshold <- max(plot_data$cwa) / threshold_factor + plot_data_low <- subset(plot_data, cwa < threshold) + plot_data_high <- subset(plot_data, cwa >= threshold) + plot_data_low_thin <- plot_data_low[seq(from = 1, to = nrow(plot_data_low), by = thin_low), ] + plot_data_combined <- rbind(plot_data_low_thin, plot_data_high) + plot_data <- plot_data_combined[order(plot_data_combined$time), ] + lct <- Sys.getlocale("LC_TIME") tmp <- Sys.setlocale("LC_TIME", LC_TIME) if (identical(xlab, "default")) { @@ -102,21 +126,21 @@ plot.TOXSWA_cwa <- function(x, time_column = c("datetime", "t", "t_firstjan", "t datetime = "Time", t = "Time [days]", t_firstjan = "Time since first of January [days]", - t_rel_to_max = "Time relative to maximum concentration [days]") + t_rel_to_max = "Time relative to maximum concentration [days]") } if (identical(ylab, "default")) { ylab = paste( ifelse(total, "Total concentration", "Concentration"), "[\u03bcg/L]") } if (add) { - lines(x$cwas[c(time_column, cwa_column)], xlab = xlab, ylab = ylab, ...) + lines(plot_data, xlab = xlab, ylab = ylab, ...) } else{ if (time_column == "datetime") { - plot(x$cwas$datetime, x$cwas[[cwa_column]], type = "l", + plot(plot_data, type = "l", xlab = xlab, ylab = ylab, xaxt = "n", ...) seq_x <- seq(min(x$cwas$datetime), max(x$cwas$datetime), by = "quarter") axis(1, at = seq_x, labels = format(seq_x, "%b %Y")) } else { - plot(x$cwas[c(time_column, cwa_column)], type = "l", + plot(plot_data, type = "l", xlab = xlab, ylab = ylab, ...) } } @@ -138,14 +162,14 @@ plot.TOXSWA_cwa <- function(x, time_column = c("datetime", "t", "t_firstjan", "t #' @field cwas Dataframe holding the concentrations. #' @field events List of dataframes holding the event statistics for each threshold. #' @field windows Matrix of maximum time weighted average concentrations (TWAC_max) -#' and areas under the curve in µg/day * h (AUC_max_h) or µg/day * d (AUC_max_d) +#' and areas under the curve in µg/day * h (AUC_max_h) or µg/day * d (AUC_max_d) #' for the requested moving window sizes in days. #' @section Methods: #' \describe{ #' \item{\code{get_events(threshold, total = FALSE)}}{ #' Populate a datataframe with event information for the specified threshold value #' in µg/L. If \code{total = TRUE}, the total concentration including the amount -#' adsorbed to suspended matter will be used. The resulting dataframe is stored in the +#' adsorbed to suspended matter will be used. The resulting dataframe is stored in the #' \code{events} field of the object. #' } #' \item{\code{moving_windows(windows, total = FALSE)}}{ @@ -174,7 +198,7 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", cwas = NULL, windows = NULL, events = list(), - initialize = function(filename, basedir, zipfile = NULL, + initialize = function(filename, basedir, zipfile = NULL, segment = "last", substance = "parent", total = FALSE) { self$filename <- filename self$basedir <- basedir @@ -184,7 +208,7 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", } else { try(file_connection <- file(file.path(basedir, filename), "rt")) } - + if (grepl(".cwa$", filename)) { # cwa file from FOCUS TOXSWA 3 (TOXSWA 2.x.y) @@ -204,15 +228,15 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", if (segment == "last") segment = max(available_segments) if (!segment %in% available_segments) stop("Invalid segment specified") self$segment <- segment - cwa <- subset(cwa_all_segments, segment == self$segment, + cwa <- subset(cwa_all_segments, segment == self$segment, c("datetime", "t", "segment", "cwa", "cwa_tot")) lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C") cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%H:%M", tz = "UTC") Sys.setlocale("LC_TIME", lct) - startyear = format(cwa$datetime[1], "%Y") + startyear = format(cwa$datetime[1], "%Y") firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d", tz = "UTC") - cwa$t_firstjan <- as.numeric(difftime(cwa$datetime, + cwa$t_firstjan <- as.numeric(difftime(cwa$datetime, firstjan, units = "days")) t_max = cwa[which.max(cwa$cwa), "t"] @@ -221,7 +245,7 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", cwa$cwa_tot_mug_per_L <- cwa$cwa_tot * 1000 self$cwas <- cwa[c("datetime", "t", "t_firstjan", "t_rel_to_max", - "cwa_mug_per_L", + "cwa_mug_per_L", "cwa_tot_mug_per_L")] } else { stop("Could not read ", filename) @@ -241,7 +265,7 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", if (!substance %in% c("parent", substances)) { stop("No data for substance ", substance, " present in the .out file.") } - + # Generate field name for the concentrations at end of hour for the # substance of interest if (substance == "parent") { @@ -250,9 +274,9 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", cwa_string = paste0("ConLiqWatLayCur_", substance) } - cwa_lines <- grep(cwa_string, outfile, value = TRUE) + cwa_lines <- grep(cwa_string, outfile, value = TRUE) - cwa_all_segments <- read_fwf(paste(cwa_lines, collapse = "\n"), + cwa_all_segments <- read_fwf(paste(cwa_lines, collapse = "\n"), fwf_empty(paste(tail(cwa_lines), collapse = "\n"))) @@ -269,26 +293,26 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", # Append time "-00h00" to datetime if there is no time (only 11 characters) # The fact that the time is missing at 00h00 was reported to Mark - # Liedekerke, Wim Beltman, Paulien Adriaanse, and Chris Lythgo - # on 14 December 2016 - cwa <- within(cwa, + # Liedekerke, Wim Beltman, Paulien Adriaanse, and Chris Lythgo + # on 14 December 2016 + cwa <- within(cwa, datetime <- ifelse(nchar(datetime) == 11, paste0(datetime, "-00h00"), datetime)) if (total) { cwa_tot_lines <- outfile[grep("ConSysWatLay_", outfile)] # hourly total conc. - cwa_tot_all_segments <- read.table(text = cwa_lines) + cwa_tot_all_segments <- read.table(text = cwa_lines) cwa$cwa_tot = cwa_tot_all_segments[[3 + segment]] } lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C") cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%Hh%M", tz = "UTC") Sys.setlocale("LC_TIME", lct) - startyear = format(cwa$datetime[1], "%Y") - firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d", + startyear = format(cwa$datetime[1], "%Y") + firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d", tz = "UTC") - cwa$t_firstjan <- as.numeric(difftime(cwa$datetime, + cwa$t_firstjan <- as.numeric(difftime(cwa$datetime, firstjan, units = "days")) t_max = cwa[which.max(cwa$cwa), "t"] cwa$t_rel_to_max = cwa$t - t_max @@ -316,11 +340,11 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", for (i in seq_along(windows)) { window_size = windows[i] filter_size = window_size * 24 - max_TWAC = max(filter(self$cwas[cwa_column], rep(1/filter_size, filter_size), + max_TWAC = max(filter(self$cwas[cwa_column], rep(1/filter_size, filter_size), "convolution"), na.rm = TRUE) max_AUC_h = max_TWAC * filter_size max_AUC_d = max_TWAC * window_size - + self$windows[i, -1] = c(max_TWAC, max_AUC_h, max_AUC_d) } invisible(self) @@ -330,8 +354,8 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", stop("You need to specify at least one threshold concentration in \u03bcg/L") } for (threshold in thresholds) { - events = data.frame(t_start = numeric(), cwa_max = numeric(), - duration = numeric(), pre_interval = numeric(), + events = data.frame(t_start = numeric(), cwa_max = numeric(), + duration = numeric(), pre_interval = numeric(), AUC_h = numeric(), AUC_d = numeric()) cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L") @@ -364,10 +388,10 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", events[event_nr, ] = c(event_start, event_max, event_length, pre_interval, event_AUC_h, event_AUC_d) event = FALSE - } + } } } - + self$events[[as.character(threshold)]] <- events } invisible(self) -- cgit v1.2.1