1 |
#' @title An R6 class for chemical entities with associated data |
|
2 |
#' |
|
3 |
#' @description The class is initialised with an identifier. Chemical |
|
4 |
#' information is retrieved from the internet. Additionally, it can be |
|
5 |
#' generated using RDKit if RDKit and its python bindings are installed. |
|
6 |
#' |
|
7 |
#' @export |
|
8 |
#' @format An [R6Class] generator object |
|
9 |
#' @importFrom R6 R6Class |
|
10 |
#' @importFrom utils URLencode |
|
11 |
#' @importFrom webchem get_cid cid_compinfo |
|
12 |
#' @importFrom grImport PostScriptTrace readPicture |
|
13 |
#' @importFrom yaml yaml.load_file |
|
14 |
#' @importFrom rsvg rsvg_ps |
|
15 |
#' @param identifier Identifier to be stored in the object |
|
16 |
#' @param smiles Optional user provided SMILES code |
|
17 |
#' @param inchikey Optional user provided InChI Key |
|
18 |
#' @param pubchem Should an attempt be made to retrieve chemical |
|
19 |
#' information from PubChem via the webchem package? |
|
20 |
#' @param pubchem_from Possibility to select the argument |
|
21 |
#' that is used to query pubchem |
|
22 |
#' @param rdkit Should an attempt be made to retrieve chemical |
|
23 |
#' information from a local rdkit installation via python |
|
24 |
#' and the reticulate package? |
|
25 |
#' @param template An optional SMILES code to be used as template for RDKit |
|
26 |
#' @param chyaml Should we look for a identifier.yaml file in the working |
|
27 |
#' directory? |
|
28 |
#' @param T Temperature |
|
29 |
#' @param pH pH value |
|
30 |
#' @param source An acronym specifying the source of the information |
|
31 |
#' @param page The page from which the information was taken |
|
32 |
#' @param soils Names of the soils |
|
33 |
#' @param type The soil type |
|
34 |
#' @param pH_orig The pH stated in the study |
|
35 |
#' @param pH_medium The medium in which this pH was measured |
|
36 |
#' @param pH_H2O The pH extrapolated to pure water |
|
37 |
#' @param perc_OC The percentage of organic carbon in the soil |
|
38 |
#' @param pages The pages from which the information was taken |
|
39 |
#' @param remark A remark |
|
40 |
#' @param file The file to write to |
|
41 |
#' @param dir The directory to write the file to |
|
42 |
#' @examples |
|
43 |
#' caffeine <- chent$new("caffeine") |
|
44 |
#' print(caffeine) |
|
45 |
#' if (!is.null(caffeine$Picture)) { |
|
46 |
#' plot(caffeine) |
|
47 |
#' } |
|
48 |
#' oct <- chent$new("1-octanol", smiles = "CCCCCCCCO", pubchem = FALSE) |
|
49 |
#' print(oct) |
|
50 |
chent <- R6Class("chent", |
|
51 |
public = list( |
|
52 |
#' @field identifier (`character(1)`)\cr |
|
53 |
#' The identifier that was used to initiate the object, with attribute 'source' |
|
54 |
identifier = NULL, |
|
55 | ||
56 |
#' @field inchikey (`character(1)`)\cr |
|
57 |
#' InChI Key, with attribute 'source' |
|
58 |
inchikey = NULL, |
|
59 | ||
60 |
#' @field smiles (`character()`)\cr |
|
61 |
#' SMILES code(s), with attribute 'source' |
|
62 |
smiles = NULL, |
|
63 | ||
64 |
#' @field mw (`numeric(1)`)\cr |
|
65 |
#' Molecular weight, with attribute 'source' |
|
66 |
mw = NULL, |
|
67 | ||
68 |
#' @field pubchem (`list()`)\cr |
|
69 |
#' List of information retrieved from PubChem |
|
70 |
pubchem = NULL, |
|
71 | ||
72 |
#' @field rdkit |
|
73 |
#' List of information obtained with RDKit |
|
74 |
rdkit = NULL, |
|
75 | ||
76 |
#' @field mol <rdkit.Chem.rdchem.Mol> object |
|
77 |
mol = NULL, |
|
78 | ||
79 |
#' @field svg SVG code |
|
80 |
svg = NULL, |
|
81 | ||
82 |
#' @field Picture Graph as a [grImport::Picture-class] object obtained using the grImport package |
|
83 |
Picture = NULL, |
|
84 | ||
85 |
#' @field Pict_font_size Font size as extracted from the intermediate PostScript file |
|
86 |
Pict_font_size = NULL, |
|
87 | ||
88 |
#' @field pdf_height Height of the MediaBox in the pdf after cropping |
|
89 |
pdf_height = NULL, |
|
90 | ||
91 |
#' @field p0 Vapour pressure in Pa |
|
92 |
p0 = NULL, |
|
93 | ||
94 |
#' @field cwsat Water solubility in mg/L |
|
95 |
cwsat = NULL, |
|
96 | ||
97 |
#' @field PUF Plant uptake factor |
|
98 |
PUF = NULL, |
|
99 | ||
100 |
#' @field chyaml List of information obtained from a YAML file |
|
101 |
chyaml = NULL, |
|
102 | ||
103 |
#' @description |
|
104 |
#' Creates a new instance of this [R6][R6::R6Class] class. |
|
105 |
initialize = function(identifier, smiles = NULL, inchikey = NULL, |
|
106 |
pubchem = TRUE, pubchem_from = c('name', 'smiles', 'inchikey'), |
|
107 |
rdkit = TRUE, template = NULL, |
|
108 |
chyaml = FALSE) { |
|
109 | ||
110 | 2x |
self$identifier <- identifier |
111 | 2x |
names(self$identifier) <- make.names(identifier) |
112 | 2x |
pubchem_from = match.arg(pubchem_from) |
113 | ||
114 | 2x |
self$smiles <- c(user = smiles) |
115 | ||
116 | 2x |
if (pubchem) { |
117 | 1x |
if (pubchem_from == 'name') { |
118 | ! |
query = identifier |
119 |
} else { |
|
120 | 1x |
query = get(pubchem_from) |
121 |
} |
|
122 | 1x |
self$try_pubchem(query, from = pubchem_from) |
123 |
} |
|
124 | ||
125 | 2x |
if (rdkit) { |
126 | 1x |
if (rdkit_available) { |
127 | 1x |
self$get_rdkit(template = template) |
128 |
} else { |
|
129 | ! |
message("RDKit is not available") |
130 |
} |
|
131 |
} |
|
132 | ||
133 | 2x |
if (chyaml) { |
134 | ! |
self$get_chyaml() |
135 |
} |
|
136 | ||
137 |
# Define main identifiers as NA if still not available |
|
138 | 2x |
if (is.null(self$smiles)) { |
139 | ! |
self$smiles <- NA |
140 | ! |
attr(self$smiles, "source") <- "user" |
141 |
} |
|
142 | 2x |
if (is.null(self$inchikey)) { |
143 | 1x |
self$inchikey <- NA |
144 | 1x |
attr(self$inchikey, "source") <- "user" |
145 |
} |
|
146 | 2x |
if (is.null(self$mw)) { |
147 | 1x |
self$mw <- NA |
148 | 1x |
attr(self$mw, "source") <- "user" |
149 |
} |
|
150 | ||
151 | 2x |
invisible(self) |
152 |
}, |
|
153 | ||
154 |
#' @description |
|
155 |
#' Try to get chemical information from PubChem |
|
156 |
#' @param query Query string to be passed to [get_cid][webchem::get_cid] |
|
157 |
#' @param from Passed to [get_cid][webchem::get_cid] |
|
158 |
try_pubchem = function(query, from = 'name') { |
|
159 | 2x |
message("PubChem:") |
160 | 1x |
if (missing(query)) query <- self$identifier |
161 | 2x |
pubchem_result = webchem::get_cid(query, from = from, match = "first") |
162 | ||
163 | 2x |
if (is.na(pubchem_result[[1, "cid"]])) { |
164 | ! |
message("Query ", query, " did not give results at PubChem") |
165 |
} else { |
|
166 | 2x |
self$get_pubchem(pubchem_result[[1, "cid"]]) |
167 |
} |
|
168 |
}, |
|
169 | ||
170 |
#' @description |
|
171 |
#' Get chemical information from PubChem for a known PubChem CID |
|
172 |
#' @param pubchem_cid CID |
|
173 |
get_pubchem = function(pubchem_cid) { |
|
174 | 2x |
self$pubchem = as.list(webchem::pc_prop(pubchem_cid, from = "cid", |
175 | 2x |
properties = c("MolecularFormula", "MolecularWeight", |
176 | 2x |
"ConnectivitySMILES", "SMILES", |
177 | 2x |
"InChI", "InChIKey", "IUPACName", |
178 | 2x |
"XLogP", "TPSA", "Complexity", "Charge", |
179 | 2x |
"HBondDonorCount", "HBondAcceptorCount"))) |
180 | 2x |
self$pubchem$synonyms = webchem::pc_synonyms(pubchem_cid, from = "cid")[[1]] |
181 | ||
182 | 2x |
self$smiles["PubChem"] <- self$pubchem$SMILES |
183 | ||
184 | 2x |
if (self$pubchem$SMILES != self$pubchem$ConnectivitySMILES) { |
185 | ! |
self$smiles["PubChem_Connectivity"] <- self$pubchem$ConnectivitySMILES |
186 |
} |
|
187 | ||
188 | 2x |
self$mw = as.numeric(self$pubchem$MolecularWeight) |
189 | 2x |
attr(self$mw, "source") <- "pubchem" |
190 | ||
191 | 2x |
if (is.null(self$inchikey)) { |
192 | ! |
self$inchikey <- self$pubchem$InChIKey |
193 | ! |
attr(self$inchikey, "source") <- "pubchem" |
194 |
} else { |
|
195 | 2x |
if (is.na(self$inchikey)) { |
196 | 1x |
warning("Overwriting uninitialized InChIKey") |
197 | 1x |
self$inchikey <- self$pubchem$InChIKey |
198 | 1x |
attr(self$inchikey, "source") <- "pubchem" |
199 |
} else { |
|
200 | 1x |
if (length(self$inchikey) > 1) { |
201 | ! |
message("InChIKey ", self$inchikey, " retreived from ", |
202 | ! |
attr(self$inchikey, "source"), |
203 | ! |
" has length > 1, using PubChem InChIKey") |
204 | ! |
self$inchikey <- self$pubchem$InChIKey |
205 | ! |
attr(self$inchikey, "source") <- "pubchem" |
206 |
} else { |
|
207 | 1x |
if (self$pubchem$InChIKey != self$inchikey) { |
208 | ! |
message("InChiKey ", self$pubchem$InChIKey, " from PubChem record does not match\n", |
209 | ! |
"InChiKey ", self$inchikey, " retreived from ", |
210 | ! |
attr(self$inchikey, "source")) |
211 |
} else { |
|
212 | 1x |
attr(self$inchikey, "source") <- c(attr(self$inchikey, "source"), "pubchem") |
213 |
} |
|
214 |
} |
|
215 |
} |
|
216 |
} |
|
217 |
}, |
|
218 | ||
219 |
#' @description |
|
220 |
#' Get chemical information from RDKit if available |
|
221 |
get_rdkit = function(template = NULL) { |
|
222 |
|
|
223 | ! |
if (!rdkit_available) stop("RDKit is not available") |
224 | ! |
if (is.null(self$smiles)) stop("RDKit would need a SMILES code") |
225 |
|
|
226 | 2x |
available_smiles <- names(self$smiles) |
227 | 2x |
smiles_preference <- c("user", "PubChem", "PubChem_Connectivity") |
228 | 2x |
smiles_preferred_i <- min(match(available_smiles, smiles_preference)) |
229 | 2x |
smiles_preferred <- smiles_preference[smiles_preferred_i] |
230 | ||
231 | 2x |
message("Trying to get chemical information from RDKit using ", |
232 | 2x |
smiles_preferred, " SMILES\n", |
233 | 2x |
self$smiles[smiles_preferred]) |
234 | 2x |
self$rdkit <- list() |
235 | 2x |
self$mol <- rdkit_module$Chem$MolFromSmiles(self$smiles[1]) |
236 | 2x |
self$rdkit$mw <- rdkit_module$Chem$Descriptors$MolWt(self$mol) |
237 | 2x |
if (!is.na(self$mw)) { |
238 | 1x |
if (round(self$rdkit$mw, 1) != round(self$mw, 1)) { |
239 | ! |
message("RDKit mw is ", self$rdkit$mw) |
240 | ! |
message("mw is ", self$mw) |
241 |
} |
|
242 |
} else { |
|
243 | 1x |
self$mw <- self$rdkit$mw |
244 | 1x |
attr(self$mw, "source") <- "rdkit" |
245 |
} |
|
246 | ||
247 |
# Create an SVG representation |
|
248 | 2x |
rdkit_module$Chem$rdDepictor$Compute2DCoords(self$mol) |
249 | 2x |
if (!is.null(template)) { |
250 | ! |
rdkit_template <- rdkit_module$Chem$MolFromSmiles(template) |
251 | ! |
rdkit_module$Chem$rdDepictor$Compute2DCoords(template) |
252 | ! |
rdkit$Chem$AllChem$GenerateDepictionMatching2DStructure(self$mol, template) |
253 |
} |
|
254 | 2x |
d2d <- rdkit_module$Chem$Draw$rdMolDraw2D$MolDraw2DSVG(400L, 400L) |
255 | 2x |
d2d$DrawMolecule(self$mol) |
256 | 2x |
d2d$FinishDrawing() |
257 | 2x |
self$svg <- d2d$GetDrawingText() |
258 | 2x |
svgfile <- tempfile(fileext = ".svg") |
259 | 2x |
psfile <- tempfile(fileext = ".ps") |
260 | 2x |
writeLines(self$svg, svgfile) |
261 | 2x |
rsvg::rsvg_ps(svgfile, psfile) |
262 | ||
263 |
# Get size properties useful for plotting |
|
264 | 2x |
ps_font_line <- grep("Tm$", readLines(psfile), value = TRUE)[1] |
265 | 2x |
ps_font_size <- gsub(" .*$", "", ps_font_line) |
266 | 2x |
self$Pict_font_size = as.numeric(ps_font_size) |
267 | ||
268 |
# Read in to create Picture |
|
269 | 2x |
xmlfile <- tempfile(fileext = ".xml") |
270 | 2x |
PostScriptTrace(psfile, outfilename = xmlfile) |
271 | 2x |
unlink(paste0("capture", basename(psfile))) |
272 | 2x |
self$Picture <- readPicture(xmlfile) |
273 | 2x |
unlink(c(xmlfile, psfile, svgfile)) |
274 |
}, |
|
275 | ||
276 |
#' @description |
|
277 |
#' Obtain information from a YAML file |
|
278 |
#' @param repo Should the file be looked for in the current working |
|
279 |
#' directory, a local git repository under `~/git/chyaml`, or from |
|
280 |
#' the web (not implemented). |
|
281 |
#' @param chyaml The filename to be looked for |
|
282 |
get_chyaml = function(repo = c("wd", "local", "web"), |
|
283 |
chyaml = paste0(URLencode(self$identifier), ".yaml")) |
|
284 |
{ |
|
285 | ! |
repo = match.arg(repo) |
286 | ! |
paths = c( |
287 | ! |
wd = ".", |
288 | ! |
local = file.path("~", "git/chyaml")) |
289 | ||
290 | ! |
chyaml_handlers = list( |
291 | ! |
expr = function(x) NULL, # To avoid security risks from reading chyaml files |
292 | ! |
dataframe = function(x) |
293 | ! |
eval(parse(text = paste0("data.frame(", x, ", stringsAsFactors = FALSE)")))) |
294 | ||
295 | ! |
if (repo %in% c("wd", "local")) { |
296 | ! |
path = paths[repo] |
297 | ! |
full = file.path(path, chyaml) |
298 | ! |
if (!file.exists(full)) { |
299 | ! |
message("Did not find chyaml file ", full) |
300 |
} else { |
|
301 | ! |
if (is(try(self$chyaml <- yaml.load_file(chyaml, handlers = chyaml_handlers)), |
302 | ! |
"try-error")) { |
303 | ! |
message("Could not load ", full) |
304 |
} else { |
|
305 | ! |
message("Loaded ", full) |
306 |
} |
|
307 |
} |
|
308 |
} else { |
|
309 | ! |
message("web repositories not implemented") |
310 |
} |
|
311 |
}, |
|
312 | ||
313 |
#' @description |
|
314 |
#' Add a vapour pressure |
|
315 |
#' @param p0 The vapour pressure in Pa |
|
316 |
add_p0 = function(p0, T = NA, source = NA, page = NA, remark = "") { |
|
317 | ! |
self$p0 <- p0 |
318 | ! |
attr(self$p0, "T") <- T |
319 | ! |
attr(self$p0, "source") <- source |
320 | ! |
attr(self$p0, "page") <- page |
321 | ! |
attr(self$p0, "remark") <- remark |
322 |
}, |
|
323 | ||
324 |
#' @description |
|
325 |
#' Add a water solubility |
|
326 |
#' @param cwsat The water solubility in mg/L |
|
327 |
add_cwsat = function(cwsat, T = NA, pH = NA, |
|
328 |
source = NA, page = NA, remark = "") |
|
329 |
{ |
|
330 | ! |
self$cwsat <- cwsat |
331 | ! |
attr(self$cwsat, "T") <- T |
332 | ! |
attr(self$cwsat, "pH") <- pH |
333 | ! |
attr(self$cwsat, "source") <- source |
334 | ! |
attr(self$cwsat, "page") <- page |
335 | ! |
attr(self$cwsat, "remark") <- remark |
336 |
}, |
|
337 | ||
338 |
#' @description |
|
339 |
#' Add a plant uptake factor |
|
340 |
#' @param PUF The plant uptake factor, a number between 0 and 1 |
|
341 |
add_PUF = function(PUF = 0, |
|
342 |
source = "focus_generic_gw_2014", page = 41, |
|
343 |
remark = "Conservative default value") |
|
344 |
{ |
|
345 | ! |
self$PUF <- PUF |
346 | ! |
attr(self$PUF, "source") <- source |
347 | ! |
attr(self$PUF, "page") <- page |
348 | ! |
attr(self$PUF, "remark") <- remark |
349 |
}, |
|
350 | ||
351 |
#' @field TPs List of transformation products as chent objects |
|
352 |
TPs = list(), |
|
353 | ||
354 |
#' @description |
|
355 |
#' Add a transformation product to the internal list |
|
356 |
#' @param x A [chent] object, or an identifier to generate a [chent] object |
|
357 |
#' @param pubchem Should chemical information be obtained from PubChem? |
|
358 |
add_TP = function(x, smiles = NULL, pubchem = FALSE) { |
|
359 | ! |
if (inherits(x, "chent")) { |
360 | ! |
id <- names(x$identifier) |
361 | ! |
chent <- x |
362 |
} else { |
|
363 | ! |
id <- make.names(x) |
364 | ! |
chent <- chent$new(x, smiles = smiles, pubchem = pubchem) |
365 |
} |
|
366 | ! |
self$TPs[[id]] <- chent |
367 |
}, |
|
368 | ||
369 |
#' @field transformations Data frame of observed transformations |
|
370 |
transformations = data.frame(study_type = character(0), |
|
371 |
TP_identifier = character(0), |
|
372 |
max_occurrence = numeric(0), |
|
373 |
source = character(0), |
|
374 |
page = character(0), |
|
375 |
stringsAsFactors = FALSE), |
|
376 | ||
377 |
#' @description |
|
378 |
#' Add a line in the internal dataframe holding observed transformations |
|
379 |
#' @param study_type A characterisation of the study type |
|
380 |
#' @param TP_identifier An identifier of one of the transformation products |
|
381 |
#' in `self$TPs` |
|
382 |
#' @param max_occurrence The maximum observed occurrence of the |
|
383 |
#' transformation product, expressed as a fraction of the amount that would |
|
384 |
#' result from stochiometric transformation |
|
385 |
add_transformation = function(study_type, TP_identifier, max_occurrence, |
|
386 |
remark = "", source = NA, pages = NA) |
|
387 |
{ |
|
388 | ! |
TP_name = make.names(TP_identifier) |
389 | ! |
if (!inherits(self$TPs[[TP_name]], "chent")) { |
390 | ! |
stop(paste("Please add the TP", TP_identifier, "first using chent$add_TP()")) |
391 |
} |
|
392 | ! |
TP_chent <- self$TPs[TP_name] |
393 | ! |
if (is.numeric(pages)) pages <- paste(pages, collapse = ", ") |
394 | ! |
cn <- colnames(self$transformations) |
395 | ! |
self$transformations <- rbind(self$transformations, |
396 | ! |
data.frame(study_type = study_type, |
397 | ! |
TP_identifier = TP_identifier, |
398 | ! |
max_occurrence = max_occurrence, |
399 | ! |
remark = remark, |
400 | ! |
source = source, |
401 | ! |
page = page, |
402 | ! |
stringsAsFactors = FALSE)) |
403 |
}, |
|
404 | ||
405 |
#' @field soil_degradation Dataframe of modelling DT50 values |
|
406 |
soil_degradation = NULL, |
|
407 | ||
408 |
#' @description |
|
409 |
#' Add a line in the internal dataframe holding modelling DT50 values |
|
410 |
#' @param DT50_mod The modelling DT50 in the sense of regulatory pesticide |
|
411 |
#' fate modelling |
|
412 |
#' @param DT50_mod_ref The normalised modelling DT50 in the sense of |
|
413 |
#' regulatory pesticide fate modelling |
|
414 |
#' @param country The country (mainly for field studies) |
|
415 |
#' @param temperature The temperature during the study in degrees Celsius |
|
416 |
#' @param moisture The moisture during the study |
|
417 |
#' @param category Is it a laboratory ('lab') or field study ('field') |
|
418 |
#' @param formulation Name of the formulation applied, if it was not |
|
419 |
#' the technical active ingredient |
|
420 |
#' @param model The degradation model used for deriving `DT50_mod` |
|
421 |
#' @param chi2 The relative error as defined in FOCUS kinetics |
|
422 |
add_soil_degradation = function(soils, DT50_mod, DT50_mod_ref, |
|
423 |
type = NA, country = NA, |
|
424 |
pH_orig = NA, pH_medium = NA, pH_H2O = NA, |
|
425 |
perc_OC = NA, |
|
426 |
temperature = NA, moisture = NA, |
|
427 |
category = "lab", formulation = NA, |
|
428 |
model = NA, chi2 = NA, |
|
429 |
remark = "", source, page = NA) |
|
430 |
{ |
|
431 | ! |
new_soil_degradation = data.frame( |
432 | ! |
soil = soils, |
433 | ! |
DT50_mod = DT50_mod, |
434 | ! |
DT50_mod_ref = DT50_mod_ref, |
435 | ! |
type = type, |
436 | ! |
country = country, |
437 | ! |
pH_orig = pH_orig, |
438 | ! |
pH_medium = pH_medium, |
439 | ! |
pH_H2O = pH_H2O, |
440 | ! |
perc_OC = perc_OC, |
441 | ! |
temperature = temperature, |
442 | ! |
moisture = moisture, |
443 | ! |
category = category, |
444 | ! |
formulation = formulation, |
445 | ! |
model = model, |
446 | ! |
chi2 = chi2, |
447 | ! |
remark = remark, |
448 | ! |
source = source, |
449 | ! |
page = page, |
450 | ! |
stringsAsFactors = FALSE) |
451 | ! |
if (is.null(self$soil_degradation)) { |
452 | ! |
self$soil_degradation <- new_soil_degradation |
453 |
} else { |
|
454 | ! |
self$soil_degradation <- rbind(self$soil_degradation, new_soil_degradation) |
455 |
} |
|
456 |
}, |
|
457 | ||
458 |
#' @field soil_ff Dataframe of formation fractions |
|
459 |
soil_ff = NULL, |
|
460 | ||
461 |
#' @description |
|
462 |
#' Add one or more formation fractions for degradation in soil |
|
463 |
#' @param target The identifier(s) of the transformation product |
|
464 |
#' @param soils The soil name(s) in which the transformation was observed |
|
465 |
#' @param ff The formation fraction(s) |
|
466 |
add_soil_ff = function(target, soils, ff = 1, |
|
467 |
remark = "", source, page = NA) |
|
468 |
{ |
|
469 | ! |
new_soil_ff = data.frame( |
470 | ! |
target = target, |
471 | ! |
target = target, |
472 | ! |
soil = soils, |
473 | ! |
ff = ff, |
474 | ! |
remark = remark, |
475 | ! |
source = source, |
476 | ! |
page = page, |
477 | ! |
stringsAsFactors = FALSE) |
478 | ! |
if (is.null(self$soil_ff)) { |
479 | ! |
self$soil_ff <- new_soil_ff |
480 |
} else { |
|
481 | ! |
self$soil_ff <- rbind(self$soil_ff, new_soil_ff) |
482 |
} |
|
483 |
}, |
|
484 | ||
485 |
#' @field soil_sorption Dataframe of soil sorption data |
|
486 |
soil_sorption = NULL, |
|
487 | ||
488 |
#' @description |
|
489 |
#' Add soil sorption data |
|
490 |
#' @param Kf The sorption constant in L/kg, either linear (then `N` is 1) |
|
491 |
#' or according to Freundlich |
|
492 |
#' @param Kfoc The constant from above, normalised to soil organic carbon |
|
493 |
#' @param N The Freundlich exponent |
|
494 |
#' @param perc_clay The percentage of clay in the soil |
|
495 |
#' @param CEC The cation exchange capacity |
|
496 |
add_soil_sorption = function(soils, |
|
497 |
Kf, Kfoc, N, |
|
498 |
type = NA, pH_orig = NA, pH_medium = NA, |
|
499 |
pH_H2O = NA, |
|
500 |
perc_OC = NA, perc_clay = NA, CEC = NA, |
|
501 |
remark = "", source, page = NA) |
|
502 |
{ |
|
503 | ! |
new_soil_sorption = data.frame( |
504 | ! |
soils = soils, |
505 | ! |
Kf = Kf, Kfoc = Kfoc, N = N, |
506 | ! |
type = type, |
507 | ! |
pH_orig = pH_orig, |
508 | ! |
pH_medium = pH_medium, |
509 | ! |
pH_H2O = pH_H2O, |
510 | ! |
perc_OC = perc_OC, perc_clay = perc_clay, CEC = CEC, |
511 | ! |
remark = remark, |
512 | ! |
source = source, |
513 | ! |
page = page, |
514 | ! |
stringsAsFactors = FALSE) |
515 | ! |
if (is.null(self$soil_sorption)) { |
516 | ! |
self$soil_sorption <- new_soil_sorption |
517 |
} else { |
|
518 | ! |
self$soil_sorption <- rbind(self$soil_sorption, new_soil_sorption) |
519 |
} |
|
520 |
}, |
|
521 | ||
522 |
#' @description |
|
523 |
#' Write a PDF image of the structure |
|
524 |
pdf = function(file = paste0(self$identifier, ".pdf"), |
|
525 |
dir = "structures/pdf", template = NULL) { |
|
526 | ! |
if (!dir.exists(dir)) { |
527 | ! |
message("Directory '", dir, "' does not exist") |
528 | ! |
message("Trying to create directory '", dir, "'") |
529 | ! |
dir.create(dir, recursive = TRUE) |
530 |
} |
|
531 | ! |
path = file.path(dir, file) |
532 | ! |
message("Creating file '", path, "'") |
533 | ! |
pdf(path) |
534 | ! |
plot(self) |
535 | ! |
dev.off() |
536 | ! |
message("Cropping file '", path, "' using pdfcrop") |
537 | ! |
bash_path <- shQuote(path) |
538 | ! |
system(paste("pdfcrop --margin 10", bash_path, bash_path, "> /dev/null")) |
539 | ||
540 |
# Get the height of the MediaBox |
|
541 | ! |
head <- readLines(path, n = 20, skipNul = TRUE) |
542 | ! |
m_line <- suppressWarnings(grep("MediaBox", head, value = TRUE)) |
543 | ! |
self$pdf_height <- as.numeric(gsub("/MediaBox \\[.* (.*)\\]", "\\1", m_line)) |
544 |
}, |
|
545 | ||
546 |
#' @description |
|
547 |
#' Write a PNG image of the structure |
|
548 |
#' @param antialias Passed to [png][grDevices::png] |
|
549 |
png = function(file = paste0(self$identifier, ".png"), |
|
550 |
dir = "structures/png", antialias = 'gray') |
|
551 |
{ |
|
552 | ! |
if (!dir.exists(dir)) { |
553 | ! |
message("Directory '", dir, "' does not exist") |
554 | ! |
message("Trying to create directory '", dir, "'") |
555 | ! |
dir.create(dir, recursive = TRUE) |
556 |
} |
|
557 | ! |
path = file.path(dir, file) |
558 | ! |
message("Creating file '", path, "'") |
559 | ! |
png(path, antialias = antialias) |
560 | ! |
plot(self) |
561 | ! |
dev.off() |
562 |
}, |
|
563 | ||
564 |
#' @description |
|
565 |
#' Write an EMF image of the structure using [emf][devEMF::emf] |
|
566 |
emf = function(file = paste0(self$identifier, ".emf"), |
|
567 |
dir = "structures/emf") |
|
568 |
{ |
|
569 | ! |
if (!requireNamespace("devEMF")) { |
570 | ! |
stop("You need to have the devEMF package installed for this function") |
571 |
} |
|
572 | ! |
if (!dir.exists(dir)) { |
573 | ! |
message("Directory '", dir, "' does not exist") |
574 | ! |
message("Trying to create directory '", dir, "'") |
575 | ! |
dir.create(dir, recursive = TRUE) |
576 |
} |
|
577 | ! |
path = file.path(dir, file) |
578 | ! |
message("Creating file '", path, "'") |
579 | ! |
devEMF::emf(path) |
580 | ! |
plot(self) |
581 | ! |
dev.off() |
582 |
} |
|
583 |
) |
|
584 |
) |
|
585 | ||
586 |
#' Printing method for chent objects |
|
587 |
#' |
|
588 |
#' @param x The chent object to be printed |
|
589 |
#' @param ... Further arguments for compatibility with the S3 method |
|
590 |
#' @importFrom utils head |
|
591 |
#' @export |
|
592 |
print.chent = function(x, ...) { |
|
593 | ! |
cat("<chent>\n") |
594 | ! |
cat("Identifier $identifier", x$identifier, "\n") |
595 | ! |
cat ("InChI Key $inchikey", x$inchikey, "\n") |
596 | ! |
cat ("SMILES string $smiles:\n") |
597 | ! |
print(x$smiles) |
598 | ! |
if (!is.null(x$mw)) cat ("Molecular weight $mw:", round(x$mw, 1), "\n") |
599 | ! |
if (!is.null(x$pubchem$synonyms)) { |
600 | ! |
cat ("PubChem synonyms (up to 10):\n") |
601 | ! |
print(head(x$pubchem$synonyms, n = 10L)) |
602 |
} |
|
603 |
} |
|
604 | ||
605 |
#' Draw SVG graph from a chent object using RDKit |
|
606 |
#' |
|
607 |
#' @param x The chent object to be plotted |
|
608 |
#' @param width The desired width in pixels |
|
609 |
#' @param height The desired height in pixels |
|
610 |
#' @param filename The filename |
|
611 |
#' @param subdir The path to which the file should be written |
|
612 |
#' @export |
|
613 |
draw_svg.chent = function(x, width = 300, height = 150, |
|
614 |
filename = paste0(names(x$identifier), ".svg"), |
|
615 |
subdir = "svg") { |
|
616 | ! |
if (!rdkit_available) { |
617 | ! |
stop("RDkit is not available via reticulate") |
618 |
} else { |
|
619 | ! |
if (!dir.exists(subdir)) dir.create(subdir) |
620 | ! |
mol <- rdkit_module$Chem$MolFromSmiles(x$smiles) |
621 | ||
622 | ! |
rdkit_module$Chem$Draw$MolToFile(mol, file.path(subdir, filename), |
623 | ! |
size = c(as.integer(width), as.integer(height))) |
624 |
} |
|
625 |
} |
|
626 | ||
627 |
#' Plot method for chent objects |
|
628 |
#' |
|
629 |
#' @importFrom grImport grid.picture |
|
630 |
#' @param x The chent object to be plotted |
|
631 |
#' @param ... Further arguments passed to [grImport::grid.picture] |
|
632 |
#' @export |
|
633 |
#' @examples |
|
634 |
#' caffeine <- chent$new("caffeine") |
|
635 |
#' print(caffeine) |
|
636 |
#' if (!is.null(caffeine$Picture)) { |
|
637 |
#' plot(caffeine) |
|
638 |
#' } |
|
639 |
plot.chent = function(x, ...) { |
|
640 | ! |
if (is.null(x$Picture)) stop("No Picture object in chent, was RDKit available during creation?") |
641 | ! |
grid.picture(x$Picture) |
642 |
} |
|
643 | ||
644 |
#' @title An R6 class for pesticidal active ingredients and associated data |
|
645 |
#' |
|
646 |
#' @description This class is derived from [chent]. It makes it easy |
|
647 |
#' to create a [chent] from the ISO common name of a pesticide active |
|
648 |
#' ingredient, and additionally stores the ISO name as well as |
|
649 |
#' the complete result of querying the BCPC compendium using |
|
650 |
#' [bcpc_query][webchem::bcpc_query]. |
|
651 |
#' |
|
652 |
#' @export |
|
653 |
#' @format An [R6::R6Class] generator object |
|
654 |
#' @examples |
|
655 |
#' # On Travis, we get a certificate validation error, |
|
656 |
#' # likely because the system (xenial) is so old, |
|
657 |
#' # therefore don't run this example on Travis |
|
658 |
#' if (Sys.getenv("TRAVIS") == "") { |
|
659 |
#' |
|
660 |
#' atr <- pai$new("atrazine") |
|
661 |
#' print(atr) |
|
662 |
#' if (!is.null(atr$Picture)) { |
|
663 |
#' plot(atr) |
|
664 |
#' } |
|
665 |
#' |
|
666 |
#' } |
|
667 |
pai <- R6Class("pai", |
|
668 |
inherit = chent, |
|
669 |
public = list( |
|
670 | ||
671 |
#' @field iso ISO common name of the active ingredient according to ISO 1750 |
|
672 |
iso = NULL, |
|
673 | ||
674 |
#' @field bcpc Information retrieved from the BCPC compendium available online |
|
675 |
#' at <pesticidecompendium.bcpc.org> |
|
676 |
bcpc = NULL, |
|
677 | ||
678 |
#' @description |
|
679 |
#' Create a new pai object |
|
680 |
#' @param iso The ISO common name to be used in the query of the |
|
681 |
#' BCPC compendium |
|
682 |
#' @param identifier Alternative identifier used for querying pubchem |
|
683 |
#' @param smiles Optional user provided SMILES code |
|
684 |
#' @param inchikey Optional user provided InChI Key |
|
685 |
#' @param bcpc Should the BCPC compendium be queried? |
|
686 |
#' @param pubchem Should an attempt be made to retrieve chemical |
|
687 |
#' information from PubChem via the webchem package? |
|
688 |
#' @param pubchem_from Possibility to select the argument |
|
689 |
#' that is used to query pubchem |
|
690 |
#' @param rdkit Should an attempt be made to retrieve chemical |
|
691 |
#' information from a local rdkit installation via python |
|
692 |
#' and the reticulate package? |
|
693 |
#' @param template An optional SMILES code to be used as template for RDKit |
|
694 |
#' @param chyaml Should we look for a identifier.yaml file in the working |
|
695 |
initialize = function(iso, identifier = iso, |
|
696 |
smiles = NULL, inchikey = NULL, bcpc = TRUE, |
|
697 |
pubchem = TRUE, pubchem_from = 'auto', |
|
698 |
rdkit = TRUE, template = NULL, |
|
699 |
chyaml = FALSE) |
|
700 |
{ |
|
701 | ||
702 | 1x |
if (!is.null(inchikey)) { |
703 | ! |
self$inchikey = inchikey |
704 | ! |
attr(self$inchikey, "source") <- "user" |
705 |
} |
|
706 | ||
707 | 1x |
if (!missing(iso) & bcpc) { |
708 | 1x |
message("BCPC:") |
709 | 1x |
bcpc_result = webchem::bcpc_query(identifier, from = "name") |
710 | ||
711 |
# Use first element of list, as we passed a query of length one |
|
712 | 1x |
if (is.na(bcpc_result[[1]][1])) { |
713 | ! |
message("Common name ", identifier, " is not known at the BCPC compendium, trying PubChem") |
714 |
} else { |
|
715 | 1x |
self$bcpc = bcpc_result[[1]] |
716 | 1x |
self$iso = self$bcpc$cname |
717 | 1x |
attr(self$iso, "source") <- "bcpc" |
718 | 1x |
attr(self$iso, "status") <- self$bcpc$status |
719 | 1x |
bcpc_ik = self$bcpc$inchikey |
720 | 1x |
if (length(bcpc_ik) == 1 && !is.na(bcpc_ik)) { |
721 | 1x |
if (is.null(self$inchikey)) { |
722 | 1x |
self$inchikey = substr(self$bcpc$inchikey, 1, 27) |
723 | 1x |
attr(self$inchikey, "source") <- "bcpc" |
724 |
} else { |
|
725 | ! |
if (bcpc_ik == self$inchikey) { |
726 | ! |
attr(self$inchikey, "source") = c(attr(self$inchikey, "source"), "bcpc") |
727 |
} else { |
|
728 | ! |
warning("InChIKey ", self$inchikey, " differs from ", bcpc_ik, " obtained from bcpc.org") |
729 |
} |
|
730 |
} |
|
731 |
} |
|
732 |
} |
|
733 |
} |
|
734 | ||
735 |
# Set pubchem_from if not specified |
|
736 | 1x |
if (pubchem_from == 'auto') { |
737 | 1x |
pubchem_from = 'name' |
738 | 1x |
if (!is.null(self$inchikey)) { |
739 | 1x |
pubchem_from = 'inchikey' |
740 |
} |
|
741 |
} |
|
742 | ||
743 | 1x |
super$initialize(identifier = identifier, |
744 | 1x |
smiles = smiles, inchikey = self$inchikey, |
745 | 1x |
pubchem = pubchem, pubchem_from = pubchem_from, |
746 | 1x |
rdkit = rdkit, template = template, chyaml = chyaml) |
747 | ||
748 | 1x |
invisible(self) |
749 |
} |
|
750 |
) |
|
751 |
) |
|
752 | ||
753 |
#' Printing method for pai objects (pesticidal active ingredients) |
|
754 |
#' |
|
755 |
#' @param x The chent object to be printed |
|
756 |
#' @param ... Further arguments for compatibility with the S3 method |
|
757 |
#' @export |
|
758 |
print.pai = function(x, ...) { |
|
759 | ! |
cat("<pai> with ISO common name $iso", x$iso, "\n") |
760 | ! |
print.chent(x) |
761 | ! |
if (length(x$TPs) > 0) { |
762 | ! |
cat("\nTransformation products:\n") |
763 | ! |
print(x$TPs) |
764 |
} |
|
765 | ! |
if (nrow(x$transformations) > 0) { |
766 | ! |
cat("\nTransformations:\n") |
767 | ! |
print(x$transformations) |
768 |
} |
|
769 |
} |
|
770 | ||
771 |
#' @title R6 class for a plant protection product with at least one active ingredient |
|
772 |
#' |
|
773 |
#' @description Contains basic information about the active ingredients in the |
|
774 |
#' product |
|
775 |
#' |
|
776 |
#' @export |
|
777 |
#' @format An [R6::R6Class] generator object. |
|
778 |
ppp <- R6Class("ppp", |
|
779 |
public = list( |
|
780 | ||
781 |
#' @field name The name of the product |
|
782 |
name = NULL, |
|
783 | ||
784 |
#' @field ais A list of active ingredients |
|
785 |
ais = list(), |
|
786 | ||
787 |
#' @field concentrations The concentration of the ais |
|
788 |
concentrations = NULL, |
|
789 | ||
790 |
#' @field concentration_units Defaults to g/L |
|
791 |
concentration_units = NULL, |
|
792 | ||
793 |
#' @field density The density of the product |
|
794 |
density = NULL, |
|
795 | ||
796 |
#' @field density_units Defaults to g/L |
|
797 |
density_units = "g/L", |
|
798 | ||
799 |
#' @description |
|
800 |
#' Creates a new instance of this [R6][R6::R6Class] class. |
|
801 |
#' @param name The name of the product |
|
802 |
#' @param ... Identifiers of the active ingredients |
|
803 |
#' @param concentrations Concentrations of the active ingredients |
|
804 |
#' @param concentration_units Defaults to g/L |
|
805 |
#' @param density The density |
|
806 |
#' @param density_units Defaults to g/L |
|
807 |
initialize = function(name, ..., concentrations, concentration_units = "g/L", |
|
808 |
density = 1000, density_units = "g/L") |
|
809 |
{ |
|
810 | ! |
self$name <- name |
811 | ! |
self$ais <- list(...) |
812 | ! |
self$concentrations <- concentrations |
813 | ! |
self$density <- density |
814 | ! |
self$density_units <- density_units |
815 | ! |
names(self$concentrations) <- names(self$ais) |
816 | ! |
self$concentration_units <- concentration_units |
817 |
} |
|
818 |
) |
|
819 |
) |
|
820 | ||
821 |
#' Printing method for ppp objects (plant protection products) |
|
822 |
#' |
|
823 |
#' @param x The chent object to be printed |
|
824 |
#' @param ... Further arguments for compatibility with the S3 method |
|
825 |
#' @export |
|
826 |
print.ppp = function(x, ...) { |
|
827 | ! |
cat("<pp> named", x$name, "\n") |
828 |
} |
|
829 |
# vim: set ts=2 sw=2 expandtab: |
1 |
.onLoad = function(libname, pkgname) { |
|
2 | ! |
conf <- reticulate::py_discover_config("rdkit") |
3 | ! |
rdkit_available <- reticulate::py_module_available("rdkit") |
4 | ! |
rdkit_module <- try( |
5 | ! |
reticulate::import("rdkit"), |
6 | ! |
silent = TRUE) |
7 | ! |
assign('rdkit_available', rdkit_available, envir = topenv()) |
8 | ! |
assign('rdkit_module', rdkit_module, envir = topenv()) |
9 |
} |