From 0e1d517d4d6351f2d43ab8636363e73d8b8cf677 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Sat, 4 Nov 2017 11:48:08 +0100 Subject: Option to thin low TOXSWA PECsw data for plotting to reduce the file size of plots e.g. as PDF files --- ChangeLog | 6 ++++ DESCRIPTION | 2 +- R/TOXSWA_cwa.R | 96 +++++++++++++++++++++++++++++++------------------- build.log | 4 +++ man/TOXSWA_cwa.Rd | 6 ++-- man/plot.TOXSWA_cwa.Rd | 23 +++++++++--- man/read.TOXSWA_cwa.Rd | 6 ++-- 7 files changed, 96 insertions(+), 47 deletions(-) diff --git a/ChangeLog b/ChangeLog index e70d9ec..02a9284 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +commit eb4465035dd44907eae3ea0340221316b7cfca12 +Author: Johannes Ranke +Date: 2017-11-03 11:18:10 +0100 + + Also return runoff percentages + commit 06b528f0c19ca9f7a311612c0e9ae80c0d0c1d3f Author: Johannes Ranke Date: 2017-10-27 18:15:29 +0200 diff --git a/DESCRIPTION b/DESCRIPTION index bf18091..b8c99df 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: pfm Type: Package Title: Utilities for Pesticide Fate Modelling Version: 0.4-5 -Date: 2017-10-28 +Date: 2017-11-04 Authors@R: person("Johannes Ranke", email = "jranke@uni-bremen.de", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0003-4371-6538")) diff --git a/R/TOXSWA_cwa.R b/R/TOXSWA_cwa.R index 2985e1f..3d52f31 100644 --- a/R/TOXSWA_cwa.R +++ b/R/TOXSWA_cwa.R @@ -27,9 +27,9 @@ #' out file (FOCUS TOXSWA 4, i.e. TOXSWA 4.4.2 or similar). #' @param basedir The path to the directory where the cwa file resides. #' @param zipfile Optional path to a zip file containing the cwa file. -#' @param segment The segment for which the data should be read. Either "last", or +#' @param segment The segment for which the data should be read. Either "last", or #' the segment number. -#' @param total Set this to TRUE in order to read total concentrations as well. This is +#' @param total Set this to TRUE in order to read total concentrations as well. This is #' only necessary for .out files as generated by TOXSWA 4.4.2 or similar, not for .cwa #' files. For .cwa files, the total concentration is always read as well. #' @param substance For TOXSWA 4 .out files, the default value "parent" leads @@ -38,7 +38,7 @@ #' @param windows Numeric vector of width of moving windows in days, for calculating #' maximum time weighted average concentrations and areas under the curve. #' @param thresholds Numeric vector of threshold concentrations in µg/L for -#' generating event statistics. +#' generating event statistics. #' @importFrom readr read_fwf fwf_empty #' @return An instance of an R6 object of class #' \code{\link{TOXSWA_cwa}}. @@ -49,13 +49,13 @@ #' basedir = "SwashProjects/project_H_sw/TOXSWA", #' zipfile = system.file("testdata/SwashProjects.zip", #' package = "pfm")) -read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL, +read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL, segment = "last", substance = "parent", total = FALSE, windows = NULL, thresholds = NULL) { if (!missing(filename)) { - cwa <- TOXSWA_cwa$new(filename, basedir, zipfile, + cwa <- TOXSWA_cwa$new(filename, basedir, zipfile, substance = substance, total = total) if (!is.null(windows[1])) cwa$moving_windows(windows) if (!is.null(thresholds[1])) cwa$get_events(thresholds) @@ -76,6 +76,11 @@ read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL, #' @param time_column What should be used for the time axis. If "t_firstjan" is chosen, #' the time is given in days relative to the first of January in the first year. #' @param add Should we add to an existing plot? +#' @param threshold_factor The factor by which the data have to be lower than the maximum +#' in order to get thinned for plotting (see next argument). +#' @param thin_low If an integer greater than 1, the data close to zero (smaller than +#' 1/threshold_factor of the maximum) in the series will be thinned by this factor +#' in order to decrease the amount of data that is included in the plots #' @param total Should the total concentration in water be plotted, including substance sorbed #' to suspended matter? #' @param LC_TIME Specification of the locale used to format dates @@ -84,17 +89,36 @@ read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL, #' @author Johannes Ranke #' @examples #' H_sw_D4_pond <- read.TOXSWA_cwa("00001p_pa.cwa", -#' basedir = "SwashProjects/project_H_sw/TOXSWA", -#' zipfile = system.file("testdata/SwashProjects.zip", -#' package = "pfm")) +#' basedir = "SwashProjects/project_H_sw/TOXSWA", +#' zipfile = system.file("testdata/SwashProjects.zip", package = "pfm")) #' plot(H_sw_D4_pond) +#' plot(H_sw_D4_pond, time_column = "t") +#' plot(H_sw_D4_pond, time_column = "t_firstjan") +#' plot(H_sw_D4_pond, time_column = "t_rel_to_max") +#' +#' H_sw_R1_stream <- read.TOXSWA_cwa("00003s_pa.cwa", +#' basedir = "SwashProjects/project_H_sw/TOXSWA", +#' zipfile = system.file("testdata/SwashProjects.zip", package = "pfm")) +#' plot(H_sw_R1_stream, time_column = "t_rel_to_max") plot.TOXSWA_cwa <- function(x, time_column = c("datetime", "t", "t_firstjan", "t_rel_to_max"), xlab = "default", ylab = "default", add = FALSE, + threshold_factor = 1000, thin_low = 1, total = FALSE, LC_TIME = "C", ...) { time_column = match.arg(time_column) cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L") + plot_data <- x$cwas[c(time_column, cwa_column)] + + # Thin out the data that are less than 1/1000 of the maximum + names(plot_data) <- c("time", "cwa") + threshold <- max(plot_data$cwa) / threshold_factor + plot_data_low <- subset(plot_data, cwa < threshold) + plot_data_high <- subset(plot_data, cwa >= threshold) + plot_data_low_thin <- plot_data_low[seq(from = 1, to = nrow(plot_data_low), by = thin_low), ] + plot_data_combined <- rbind(plot_data_low_thin, plot_data_high) + plot_data <- plot_data_combined[order(plot_data_combined$time), ] + lct <- Sys.getlocale("LC_TIME") tmp <- Sys.setlocale("LC_TIME", LC_TIME) if (identical(xlab, "default")) { @@ -102,21 +126,21 @@ plot.TOXSWA_cwa <- function(x, time_column = c("datetime", "t", "t_firstjan", "t datetime = "Time", t = "Time [days]", t_firstjan = "Time since first of January [days]", - t_rel_to_max = "Time relative to maximum concentration [days]") + t_rel_to_max = "Time relative to maximum concentration [days]") } if (identical(ylab, "default")) { ylab = paste( ifelse(total, "Total concentration", "Concentration"), "[\u03bcg/L]") } if (add) { - lines(x$cwas[c(time_column, cwa_column)], xlab = xlab, ylab = ylab, ...) + lines(plot_data, xlab = xlab, ylab = ylab, ...) } else{ if (time_column == "datetime") { - plot(x$cwas$datetime, x$cwas[[cwa_column]], type = "l", + plot(plot_data, type = "l", xlab = xlab, ylab = ylab, xaxt = "n", ...) seq_x <- seq(min(x$cwas$datetime), max(x$cwas$datetime), by = "quarter") axis(1, at = seq_x, labels = format(seq_x, "%b %Y")) } else { - plot(x$cwas[c(time_column, cwa_column)], type = "l", + plot(plot_data, type = "l", xlab = xlab, ylab = ylab, ...) } } @@ -138,14 +162,14 @@ plot.TOXSWA_cwa <- function(x, time_column = c("datetime", "t", "t_firstjan", "t #' @field cwas Dataframe holding the concentrations. #' @field events List of dataframes holding the event statistics for each threshold. #' @field windows Matrix of maximum time weighted average concentrations (TWAC_max) -#' and areas under the curve in µg/day * h (AUC_max_h) or µg/day * d (AUC_max_d) +#' and areas under the curve in µg/day * h (AUC_max_h) or µg/day * d (AUC_max_d) #' for the requested moving window sizes in days. #' @section Methods: #' \describe{ #' \item{\code{get_events(threshold, total = FALSE)}}{ #' Populate a datataframe with event information for the specified threshold value #' in µg/L. If \code{total = TRUE}, the total concentration including the amount -#' adsorbed to suspended matter will be used. The resulting dataframe is stored in the +#' adsorbed to suspended matter will be used. The resulting dataframe is stored in the #' \code{events} field of the object. #' } #' \item{\code{moving_windows(windows, total = FALSE)}}{ @@ -174,7 +198,7 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", cwas = NULL, windows = NULL, events = list(), - initialize = function(filename, basedir, zipfile = NULL, + initialize = function(filename, basedir, zipfile = NULL, segment = "last", substance = "parent", total = FALSE) { self$filename <- filename self$basedir <- basedir @@ -184,7 +208,7 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", } else { try(file_connection <- file(file.path(basedir, filename), "rt")) } - + if (grepl(".cwa$", filename)) { # cwa file from FOCUS TOXSWA 3 (TOXSWA 2.x.y) @@ -204,15 +228,15 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", if (segment == "last") segment = max(available_segments) if (!segment %in% available_segments) stop("Invalid segment specified") self$segment <- segment - cwa <- subset(cwa_all_segments, segment == self$segment, + cwa <- subset(cwa_all_segments, segment == self$segment, c("datetime", "t", "segment", "cwa", "cwa_tot")) lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C") cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%H:%M", tz = "UTC") Sys.setlocale("LC_TIME", lct) - startyear = format(cwa$datetime[1], "%Y") + startyear = format(cwa$datetime[1], "%Y") firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d", tz = "UTC") - cwa$t_firstjan <- as.numeric(difftime(cwa$datetime, + cwa$t_firstjan <- as.numeric(difftime(cwa$datetime, firstjan, units = "days")) t_max = cwa[which.max(cwa$cwa), "t"] @@ -221,7 +245,7 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", cwa$cwa_tot_mug_per_L <- cwa$cwa_tot * 1000 self$cwas <- cwa[c("datetime", "t", "t_firstjan", "t_rel_to_max", - "cwa_mug_per_L", + "cwa_mug_per_L", "cwa_tot_mug_per_L")] } else { stop("Could not read ", filename) @@ -241,7 +265,7 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", if (!substance %in% c("parent", substances)) { stop("No data for substance ", substance, " present in the .out file.") } - + # Generate field name for the concentrations at end of hour for the # substance of interest if (substance == "parent") { @@ -250,9 +274,9 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", cwa_string = paste0("ConLiqWatLayCur_", substance) } - cwa_lines <- grep(cwa_string, outfile, value = TRUE) + cwa_lines <- grep(cwa_string, outfile, value = TRUE) - cwa_all_segments <- read_fwf(paste(cwa_lines, collapse = "\n"), + cwa_all_segments <- read_fwf(paste(cwa_lines, collapse = "\n"), fwf_empty(paste(tail(cwa_lines), collapse = "\n"))) @@ -269,26 +293,26 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", # Append time "-00h00" to datetime if there is no time (only 11 characters) # The fact that the time is missing at 00h00 was reported to Mark - # Liedekerke, Wim Beltman, Paulien Adriaanse, and Chris Lythgo - # on 14 December 2016 - cwa <- within(cwa, + # Liedekerke, Wim Beltman, Paulien Adriaanse, and Chris Lythgo + # on 14 December 2016 + cwa <- within(cwa, datetime <- ifelse(nchar(datetime) == 11, paste0(datetime, "-00h00"), datetime)) if (total) { cwa_tot_lines <- outfile[grep("ConSysWatLay_", outfile)] # hourly total conc. - cwa_tot_all_segments <- read.table(text = cwa_lines) + cwa_tot_all_segments <- read.table(text = cwa_lines) cwa$cwa_tot = cwa_tot_all_segments[[3 + segment]] } lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C") cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%Hh%M", tz = "UTC") Sys.setlocale("LC_TIME", lct) - startyear = format(cwa$datetime[1], "%Y") - firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d", + startyear = format(cwa$datetime[1], "%Y") + firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d", tz = "UTC") - cwa$t_firstjan <- as.numeric(difftime(cwa$datetime, + cwa$t_firstjan <- as.numeric(difftime(cwa$datetime, firstjan, units = "days")) t_max = cwa[which.max(cwa$cwa), "t"] cwa$t_rel_to_max = cwa$t - t_max @@ -316,11 +340,11 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", for (i in seq_along(windows)) { window_size = windows[i] filter_size = window_size * 24 - max_TWAC = max(filter(self$cwas[cwa_column], rep(1/filter_size, filter_size), + max_TWAC = max(filter(self$cwas[cwa_column], rep(1/filter_size, filter_size), "convolution"), na.rm = TRUE) max_AUC_h = max_TWAC * filter_size max_AUC_d = max_TWAC * window_size - + self$windows[i, -1] = c(max_TWAC, max_AUC_h, max_AUC_d) } invisible(self) @@ -330,8 +354,8 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", stop("You need to specify at least one threshold concentration in \u03bcg/L") } for (threshold in thresholds) { - events = data.frame(t_start = numeric(), cwa_max = numeric(), - duration = numeric(), pre_interval = numeric(), + events = data.frame(t_start = numeric(), cwa_max = numeric(), + duration = numeric(), pre_interval = numeric(), AUC_h = numeric(), AUC_d = numeric()) cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L") @@ -364,10 +388,10 @@ TOXSWA_cwa <- R6Class("TOXSWA_cwa", events[event_nr, ] = c(event_start, event_max, event_length, pre_interval, event_AUC_h, event_AUC_d) event = FALSE - } + } } } - + self$events[[as.character(threshold)]] <- events } invisible(self) diff --git a/build.log b/build.log index 95a265b..543134b 100644 --- a/build.log +++ b/build.log @@ -1,6 +1,10 @@ * checking for file ‘./DESCRIPTION’ ... OK * preparing ‘pfm’: * checking DESCRIPTION meta-information ... OK +Warnung: /tmp/RtmpPy0RQl/Rbuild17321b2b74b/pfm/man/TOXSWA_cwa.Rd:29: unknown macro '\u00B5g' +Warnung: /tmp/RtmpPy0RQl/Rbuild17321b2b74b/pfm/man/TOXSWA_cwa.Rd:29: unknown macro '\u00B5g' +Warnung: /tmp/RtmpPy0RQl/Rbuild17321b2b74b/pfm/man/TOXSWA_cwa.Rd:38: unknown macro '\u00B5g' +Warnung: /tmp/RtmpPy0RQl/Rbuild17321b2b74b/pfm/man/read.TOXSWA_cwa.Rd:33: unknown macro '\u00B5g' * checking for LF line-endings in source and make files and shell scripts * checking for empty or unneeded directories * looking to see if a ‘data/datalist’ file should be added diff --git a/man/TOXSWA_cwa.Rd b/man/TOXSWA_cwa.Rd index 7935617..e417094 100644 --- a/man/TOXSWA_cwa.Rd +++ b/man/TOXSWA_cwa.Rd @@ -26,7 +26,7 @@ Usually, an instance of this class will be generated by \code{\link{read.TOXSWA_ \item{\code{events}}{List of dataframes holding the event statistics for each threshold.} \item{\code{windows}}{Matrix of maximum time weighted average concentrations (TWAC_max) -and areas under the curve in µg/day * h (AUC_max_h) or µg/day * d (AUC_max_d) +and areas under the curve in \u00B5g/day * h (AUC_max_h) or \u00B5g/day * d (AUC_max_d) for the requested moving window sizes in days.} }} @@ -35,8 +35,8 @@ for the requested moving window sizes in days.} \describe{ \item{\code{get_events(threshold, total = FALSE)}}{ Populate a datataframe with event information for the specified threshold value - in µg/L. If \code{total = TRUE}, the total concentration including the amount - adsorbed to suspended matter will be used. The resulting dataframe is stored in the + in \u00B5g/L. If \code{total = TRUE}, the total concentration including the amount + adsorbed to suspended matter will be used. The resulting dataframe is stored in the \code{events} field of the object. } \item{\code{moving_windows(windows, total = FALSE)}}{ diff --git a/man/plot.TOXSWA_cwa.Rd b/man/plot.TOXSWA_cwa.Rd index 4e81f91..88e1f37 100644 --- a/man/plot.TOXSWA_cwa.Rd +++ b/man/plot.TOXSWA_cwa.Rd @@ -6,7 +6,8 @@ \usage{ \method{plot}{TOXSWA_cwa}(x, time_column = c("datetime", "t", "t_firstjan", "t_rel_to_max"), xlab = "default", ylab = "default", add = FALSE, - total = FALSE, LC_TIME = "C", ...) + threshold_factor = 1000, thin_low = 1, total = FALSE, LC_TIME = "C", + ...) } \arguments{ \item{x}{The TOXSWA_cwa object to be plotted.} @@ -18,6 +19,13 @@ the time is given in days relative to the first of January in the first year.} \item{add}{Should we add to an existing plot?} +\item{threshold_factor}{The factor by which the data have to be lower than the maximum +in order to get thinned for plotting (see next argument).} + +\item{thin_low}{If an integer greater than 1, the data close to zero (smaller than +1/threshold_factor of the maximum) in the series will be thinned by this factor +in order to decrease the amount of data that is included in the plots} + \item{total}{Should the total concentration in water be plotted, including substance sorbed to suspended matter?} @@ -31,10 +39,17 @@ segment of a TOXSWA surface water body. } \examples{ H_sw_D4_pond <- read.TOXSWA_cwa("00001p_pa.cwa", - basedir = "SwashProjects/project_H_sw/TOXSWA", - zipfile = system.file("testdata/SwashProjects.zip", - package = "pfm")) + basedir = "SwashProjects/project_H_sw/TOXSWA", + zipfile = system.file("testdata/SwashProjects.zip", package = "pfm")) plot(H_sw_D4_pond) +plot(H_sw_D4_pond, time_column = "t") +plot(H_sw_D4_pond, time_column = "t_firstjan") +plot(H_sw_D4_pond, time_column = "t_rel_to_max") + +H_sw_R1_stream <- read.TOXSWA_cwa("00003s_pa.cwa", + basedir = "SwashProjects/project_H_sw/TOXSWA", + zipfile = system.file("testdata/SwashProjects.zip", package = "pfm")) +plot(H_sw_R1_stream, time_column = "t_rel_to_max") } \author{ Johannes Ranke diff --git a/man/read.TOXSWA_cwa.Rd b/man/read.TOXSWA_cwa.Rd index f1d5908..28479f2 100644 --- a/man/read.TOXSWA_cwa.Rd +++ b/man/read.TOXSWA_cwa.Rd @@ -16,21 +16,21 @@ out file (FOCUS TOXSWA 4, i.e. TOXSWA 4.4.2 or similar).} \item{zipfile}{Optional path to a zip file containing the cwa file.} -\item{segment}{The segment for which the data should be read. Either "last", or +\item{segment}{The segment for which the data should be read. Either "last", or the segment number.} \item{substance}{For TOXSWA 4 .out files, the default value "parent" leads to reading concentrations of the parent compound. Alternatively, the substance of interested can be selected by its code name.} -\item{total}{Set this to TRUE in order to read total concentrations as well. This is +\item{total}{Set this to TRUE in order to read total concentrations as well. This is only necessary for .out files as generated by TOXSWA 4.4.2 or similar, not for .cwa files. For .cwa files, the total concentration is always read as well.} \item{windows}{Numeric vector of width of moving windows in days, for calculating maximum time weighted average concentrations and areas under the curve.} -\item{thresholds}{Numeric vector of threshold concentrations in µg/L for +\item{thresholds}{Numeric vector of threshold concentrations in \u00B5g/L for generating event statistics.} } \value{ -- cgit v1.2.1