R/TOXSWA_cwa.R


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427

if(getRversion() >= '2.15.1') utils::globalVariables(c("cwa"))

#' Read TOXSWA surface water concentrations
#'
#' Read TOXSWA hourly concentrations of a chemical substance in a specific
#' segment of a TOXSWA surface water body. Per default, the data for the last
#' segment are imported. As TOXSWA 4 reports the values at the end of the hour
#' (ConLiqWatLayCur) in its summary file, we use this value as well instead
#' of the hourly averages (ConLiqWatLay). In TOXSWA 5.5.3 this variable was
#' renamed to ConLiqWatLay in the out file.
#'
#' @param filename The filename of the cwa file (TOXSWA 2.x.y  or similar) or the
#'  out file when using FOCUS TOXSWA 4 (i.e. TOXSWA 4.4.2) or higher.
#' @param basedir The path to the directory where the cwa file resides.
#' @param zipfile Optional path to a zip file containing the cwa file.
#' @param segment The segment for which the data should be read. Either "last", or
#'   the segment number.
#' @param total Set this to TRUE in order to read total concentrations as well. This is
#'   only necessary for .out files as generated by TOXSWA 4.4.2 or similar, not for .cwa
#'   files. For .cwa files, the total concentration is always read as well.
#' @param substance For .out files, the default value "parent" leads
#'   to reading concentrations of the parent compound. Alternatively, the substance
#'   of interested can be selected by its code name.
#' @param windows Numeric vector of width of moving windows in days, for calculating
#'   maximum time weighted average concentrations and areas under the curve.
#' @param thresholds Numeric vector of threshold concentrations in µg/L for
#'   generating event statistics.
#' @importFrom readr read_fwf fwf_empty
#' @importFrom grDevices dev.cur
#' @return An instance of an R6 object of class
#' \code{\link{TOXSWA_cwa}}.
#' @export
#' @author Johannes Ranke
#' @examples
#' H_sw_D4_pond  <- read.TOXSWA_cwa("00001p_pa.cwa",
#'                                  basedir = "SwashProjects/project_H_sw/TOXSWA",
#'                                  zipfile = system.file("testdata/SwashProjects.zip",
#'                                                        package = "pfm"))
read.TOXSWA_cwa <- function(filename, basedir = ".", zipfile = NULL,
                            segment = "last", substance = "parent",
                            total = FALSE,
                            windows = NULL, thresholds = NULL)
{
  if (!missing(filename)) {
    cwa <- TOXSWA_cwa$new(filename, basedir, zipfile,
                          substance = substance, total = total)
    if (!is.null(windows[1])) cwa$moving_windows(windows)
    if (!is.null(thresholds[1])) cwa$get_events(thresholds)
    invisible(cwa)
  } else {
    message("You need to specify a filename for the cwa file to be read")
  }
}

#' Plot TOXSWA surface water concentrations
#'
#' Plot TOXSWA hourly concentrations of a chemical substance in a specific
#' segment of a TOXSWA surface water body.
#'
#' @import graphics
#' @param x The TOXSWA_cwa object to be plotted.
#' @param xlab,ylab Labels for x and y axis.
#' @param time_column What should be used for the time axis. If "t_firstjan" is chosen,
#'   the time is given in days relative to the first of January in the first year.
#' @param add Should we add to an existing plot?
#' @param threshold_factor The factor by which the data have to be lower than the maximum
#'   in order to get thinned for plotting (see next argument).
#' @param thin_low If an integer greater than 1, the data close to zero (smaller than
#'   1/threshold_factor of the maximum) in the series will be thinned by this factor
#'   in order to decrease the amount of data that is included in the plots
#' @param total Should the total concentration in water be plotted, including substance sorbed
#'   to suspended matter?
#' @param LC_TIME Specification of the locale used to format dates
#' @param ... Further arguments passed to \code{plot} if we are not adding to an existing plot
#' @export
#' @author Johannes Ranke
#' @examples
#' H_sw_D4_pond  <- read.TOXSWA_cwa("00001p_pa.cwa",
#'   basedir = "SwashProjects/project_H_sw/TOXSWA",
#'   zipfile = system.file("testdata/SwashProjects.zip", package = "pfm"))
#' plot(H_sw_D4_pond)
#' plot(H_sw_D4_pond, time_column = "t")
#' plot(H_sw_D4_pond, time_column = "t_firstjan")
#' plot(H_sw_D4_pond, time_column = "t_rel_to_max")
#'
#' H_sw_R1_stream  <- read.TOXSWA_cwa("00003s_pa.cwa",
#'   basedir = "SwashProjects/project_H_sw/TOXSWA",
#'   zipfile = system.file("testdata/SwashProjects.zip", package = "pfm"))
#' plot(H_sw_R1_stream, time_column = "t_rel_to_max")
plot.TOXSWA_cwa <- function(x, time_column = c("datetime", "t", "t_firstjan", "t_rel_to_max"),
                            xlab = "default", ylab = "default",
                            add = FALSE,
                            threshold_factor = 1000, thin_low = 1,
                            total = FALSE, LC_TIME = "C", ...)
{
  time_column = match.arg(time_column)
  cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L")
  plot_data <- x$cwas[c(time_column, cwa_column)]

  # Thin out the data that are less than 1/1000 of the maximum
  names(plot_data) <- c("time", "cwa")
  threshold <- max(plot_data$cwa) / threshold_factor
  plot_data_low <- subset(plot_data, cwa < threshold)
  plot_data_high <- subset(plot_data, cwa >= threshold)
  plot_data_low_thin <- plot_data_low[seq(from = 1, to = nrow(plot_data_low), by = thin_low), ]
  plot_data_combined <- rbind(plot_data_low_thin, plot_data_high)
  plot_data <- plot_data_combined[order(plot_data_combined$time), ]

  lct <- Sys.getlocale("LC_TIME")
  tmp <- Sys.setlocale("LC_TIME", LC_TIME)
  if (identical(xlab, "default")) {
    xlab = switch(time_column,
                datetime = "Time",
                t = "Time [days]",
                t_firstjan = "Time since first of January [days]",
                t_rel_to_max = "Time relative to maximum concentration [days]")
  }
  if (identical(ylab, "default")) {
    # Use LateX if the current plotting device is tikz
    if (names(dev.cur()) == "tikz output") {
      ylab = paste( ifelse(total, "Total concentration", "Concentration"), "[$\\mu$g/L]")
    } else {
      conc_string <- ifelse(total, "Total concentration", "Concentration")
      ylab = bquote(.(conc_string) ~ group("[", mu*g/L, "]"))
    }
  }
  if (add) {
    lines(plot_data, xlab = xlab, ylab = ylab, ...)
  } else{
    if (time_column == "datetime") {
      plot(plot_data, type = "l",
           xlab = xlab, ylab = ylab, xaxt = "n", ...)
      seq_x <- seq(min(x$cwas$datetime), max(x$cwas$datetime), by = "quarter")
      axis(1, at = seq_x, labels = format(seq_x, "%b %Y"))
    } else {
      plot(plot_data, type = "l",
           xlab = xlab, ylab = ylab, ...)
    }
  }
  tmp <- Sys.setlocale("LC_TIME", lct)
}

#' R6 class for holding TOXSWA water concentration data and associated statistics
#'
#' @description An R6 class for holding TOXSWA water concentration (cwa) data
#' and some associated statistics.  like maximum moving window average
#' concentrations, and dataframes holding the events exceeding specified
#' thresholds.  Usually, an instance of this class will be generated
#' by \code{\link{read.TOXSWA_cwa}}.
#'
#' @export
#' @format An \code{\link{R6Class}} generator object.
#' @field filename Length one character vector holding the filename.
#' @field basedir Length one character vector holding the directory where the file came from.
#' @field zipfile If not null, giving the path to the zip file from which the file was read.
#' @field segment Length one integer, specifying for which segment the cwa data were read.
#' @field substance The TOXSWA name of the substance.
#' @field cwas Dataframe holding the concentrations.
#' @field events List of dataframes holding the event statistics for each threshold.
#' @field windows Matrix of maximum time weighted average concentrations (TWAC_max)
#'   and areas under the curve in µg/day * h (AUC_max_h) or µg/day * d (AUC_max_d)
#'   for the requested moving window sizes in days.
#' @examples
#' H_sw_R1_stream  <- read.TOXSWA_cwa("00003s_pa.cwa",
#'                                  basedir = "SwashProjects/project_H_sw/TOXSWA",
#'                                  zipfile = system.file("testdata/SwashProjects.zip",
#'                                              package = "pfm"))
#' H_sw_R1_stream$get_events(c(2, 10))
#' H_sw_R1_stream$moving_windows(c(7, 21))
#' print(H_sw_R1_stream)
#' @keywords data
TOXSWA_cwa <- R6Class("TOXSWA_cwa",
  public = list(
    filename = NULL,
    basedir = NULL,
    zipfile = NULL,
    segment = NULL,
    substance = NULL,
    cwas = NULL,
    windows = NULL,
    events = list(),

    #' @description
    #' Create a TOXSWA_cwa object from a file
    #' @param filename The filename
    #' @param basedir The directory to look in
    #' @param zipfile Optional path to a zipfile holding the file
    #' @param segment Either "last" or the number of the segment for which to read the data
    #' @param substance The TOXSWA substance name (for TOXSWA 4 or higher)
    #' @param total Should total concentrations be read in? If FALSE, free concentrations are read
    initialize = function(filename, basedir, zipfile = NULL,
                          segment = "last", substance = "parent", total = FALSE) {
      self$filename <- filename
      self$basedir <- basedir
      self$zipfile <- zipfile
      if (!is.null(zipfile)) {
        try(file_connection <- unz(zipfile, paste0(basedir, "/", filename)))
      } else {
        try(file_connection <- file(file.path(basedir, filename), "rt"))
      }


      if (grepl(".cwa$", filename)) {
        # cwa file from FOCUS TOXSWA 3 (TOXSWA 2.x.y)
        cwa_all_segments <- try(
          read.table(file_connection,
          sep = "", skip = 40,
          encoding = "UTF-8",
          colClasses = c("character", "numeric",
                         "integer", rep("numeric", 5)),
          col.names = c("datetime", "t", "segment",
                        "xcd", "cwa_tot", "cwa", "Xss", "Xmp")))

        if (is.null(zipfile)) close(file_connection) # only needed for files

        if (!inherits(cwa_all_segments, "try-error")) {
          available_segments = 1:max(cwa_all_segments$segment)
          if (segment == "last") segment = max(available_segments)
          if (!segment %in% available_segments) stop("Invalid segment specified")
          self$segment <- segment
          cwa <- subset(cwa_all_segments, segment == self$segment,
                        c("datetime", "t", "segment", "cwa", "cwa_tot"))
          lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C")
          cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%H:%M", tz = "UTC")
          Sys.setlocale("LC_TIME", lct)
          startyear = format(cwa$datetime[1], "%Y")
          firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d",
                               tz = "UTC")
          cwa$t_firstjan <- as.numeric(difftime(cwa$datetime,
                                                      firstjan, units = "days"))

          t_max = cwa[which.max(cwa$cwa), "t"]
          cwa$t_rel_to_max = cwa$t - t_max
          cwa$cwa_mug_per_L <- cwa$cwa * 1000
          cwa$cwa_tot_mug_per_L <- cwa$cwa_tot * 1000
          self$cwas <- cwa[c("datetime", "t", "t_firstjan",
                                          "t_rel_to_max",
                                          "cwa_mug_per_L",
                                          "cwa_tot_mug_per_L")]
        } else {
          stop("Could not read ", filename)
        }
      } else {
        # out file from FOCUS TOXSWA 4 or higher
        outfile <- try(readLines(file_connection))

        focus_toxswa_version <- gsub(".*: ", "", outfile[4])

        if (substr(focus_toxswa_version, 1, 1) == "3") {
          cwastring = "ConLiqWatLayCur"
        } else {
          cwastring = "ConLiqWatLay"
        }

        close(file_connection) # only needed for files

        if (inherits(outfile, "try-error")) {
          stop("Could not read ", filename)
        } else {
          # Get the substance name(s)
          sub_lines <- grep(paste0(".*0.000.*", cwastring, "_"), outfile[1:50], value = TRUE)
          substances <- gsub(paste0(".*", cwastring, "_(.*?) +[0-9].*"), "\\1", sub_lines)
          if (!substance %in% c("parent", substances)) {
            stop("No data for substance ", substance, " present in the .out file.")
          }

          # Generate field name for the concentrations at end of hour for the
          # substance of interest
          if (substance == "parent") {
            cwa_string = paste0(cwastring, "_", substances[1])
          } else {
            cwa_string = paste0(cwastring, "_", substance)
          }

          cwa_lines <- grep(cwa_string, outfile, value = TRUE)

          cwa_all_segments <- read_fwf(paste(cwa_lines, collapse = "\n"),
                                       fwf_empty(paste(tail(cwa_lines), collapse = "\n")))


          available_segments = 1:(ncol(cwa_all_segments) - 3)
          if (segment == "last") segment = max(available_segments)
          if (!segment %in% available_segments) stop("Invalid segment specified")
          self$segment <- segment
          cwa <- data.frame(
            datetime = as.character(cwa_all_segments$X2),
            t = cwa_all_segments$X1,
            cwa = cwa_all_segments[[3 + segment]],
            stringsAsFactors = FALSE
          )

          # Append time "-00h00" to datetime if there is no time (only 11 characters)
          # The fact that the time is missing at 00h00 was reported to Mark
          # Liedekerke, Wim Beltman, Paulien Adriaanse, and Chris Lythgo
          # on 14 December 2016
          cwa <- within(cwa,
            datetime <- ifelse(nchar(datetime) == 11,
              paste0(datetime, "-00h00"),
              datetime))

          if (total) {
            cwa_tot_lines <- outfile[grep("ConSysWatLay_", outfile)] # hourly total conc.
            cwa_tot_all_segments <- read.table(text = cwa_lines)
            cwa$cwa_tot = cwa_tot_all_segments[[3 + segment]]
          }
          lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C")
          cwa$datetime <- strptime(cwa$datetime, "%d-%b-%Y-%Hh%M", tz = "UTC")
          Sys.setlocale("LC_TIME", lct)

          startyear = format(cwa$datetime[1], "%Y")
          firstjan <- strptime(paste0(startyear, "-01-01"), "%Y-%m-%d",
                               tz = "UTC")
          cwa$t_firstjan <- as.numeric(difftime(cwa$datetime,
                                                firstjan, units = "days"))
          t_max = cwa[which.max(cwa$cwa), "t"]
          cwa$t_rel_to_max = cwa$t - t_max
          cwa$cwa_mug_per_L <- cwa$cwa * 1000
          self$cwas <- cwa[c("datetime", "t", "t_firstjan",
                                          "t_rel_to_max",
                                          "cwa_mug_per_L")]
          if (total) {
            self$cwas$cwa_tot_mug_per_L <- cwa$cwa_tot * 1000
          }
        }
      }
    },

    #' @description
    #' Add to the `windows` field described above.
    #' @param windows Window sizes in days
    #' @param total If TRUE, the total concentration including the amount adsorbed to
    #' suspended matter will be used.
    moving_windows = function(windows, total = FALSE) {
      window_names = paste(windows, "days")
      n = length(window_names)
      self$windows <- data.frame(window = window_names,
                                 max_TWAC = numeric(n),
                                 max_AUC_h = numeric(n),
                                 max_AUC_d = numeric(n))
      if (missing(windows)) {
        stop("You need to specify at least one moving window size in days")
      }
      cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L")
      for (i in seq_along(windows)) {
        window_size = windows[i]
        filter_size = window_size * 24
        max_TWAC = max(filter(self$cwas[cwa_column], rep(1/filter_size, filter_size),
                            "convolution"), na.rm = TRUE)
        max_AUC_h = max_TWAC * filter_size
        max_AUC_d = max_TWAC * window_size

        self$windows[i, -1] = c(max_TWAC, max_AUC_h, max_AUC_d)
      }
      invisible(self)
    },

    #' @description
    #' Populate a datataframe with event information for the specified
    #' threshold value.  The resulting dataframe is stored in the `events`
    #' field of the object.
    #' @param thresholds Threshold values in µg/L.
    #' @param total If TRUE, the total concentration including the amount adsorbed to
    #' suspended matter will be used.
    get_events = function(thresholds, total = FALSE) {
      if (missing(thresholds)) {
        stop("You need to specify at least one threshold concentration in \u03bcg/L")
      }
      for (threshold in thresholds) {
        events = data.frame(t_start = numeric(), cwa_max = numeric(),
                            duration = numeric(), pre_interval = numeric(),
                            AUC_h = numeric(), AUC_d = numeric())
        cwa_column = ifelse(total, "cwa_tot_mug_per_L", "cwa_mug_per_L")

        event_end = 0
        event = FALSE
        event_max = 0
        event_nr = 0
        n_rows = nrow(self$cwas)
        for (i in 1:n_rows) {
          cwa_cur = self$cwas[i, cwa_column]
          if (event == FALSE) {
            if (cwa_cur > threshold) {
              event_start = self$cwas[i, "t"]
              pre_interval = event_start - event_end
              i_start = i
              event = TRUE
              event_max = cwa_cur
            }
          } else {
            if (cwa_cur > event_max) event_max = cwa_cur
            if (cwa_cur < threshold || i == n_rows) {
              event_nr = event_nr + 1
              i_end = i
              if (i == n_rows) i_end = i
              event_end = self$cwas[i_end, "t"]
              event_length = event_end - event_start
              event_cwas <- self$cwas[i_start:(i_end - 1), cwa_column]
              event_AUC_h = sum(event_cwas)
              event_AUC_d = event_AUC_h / 24
              events[event_nr, ] = c(event_start, event_max, event_length,
                                     pre_interval, event_AUC_h, event_AUC_d)
              event = FALSE
            }
          }
        }

        self$events[[as.character(threshold)]] <- events
      }
      invisible(self)
    },

    #' @description
    #' Print a `TOXSWA_cwa` object
    print = function() {
      cat("<TOXSWA_cwa> data from file", self$filename, "segment", self$segment, "\n")
      print(head(self$cwas))
      cat("Moving window analysis\n")
      print(self$windows)
      for (threshold in names(self$events)) {
        cat("Event statistics for threshold", threshold, "\n")
        if (nrow(self$events[[threshold]]) == 0) cat("No events found\n")
        else print(self$events[[threshold]])
      }
    }
  )
)
# vim: set ts=2 sw=2 expandtab: