# Copyright (C) 2016 Johannes Ranke
# Contact: jranke@uni-bremen.de
# This file is part of the R package chents
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>
#' An R6 class for chemical entities with associated data
#'
#' The class is initialised with an identifier. Chemical information is retrieved from
#' the internet. Additionally, it can be generated using RDKit if RDKit and its
#' python bindings are installed and configured for use with PythonInR.
#'
#' @docType class
#' @export
#' @format An \code{\link{R6Class}} generator object
#' @importFrom R6 R6Class
#' @importFrom webchem get_cid cid_compinfo
#' @importFrom grImport PostScriptTrace readPicture
#' @importFrom yaml yaml.load_file
#' @field identifier The identifier that was used to initiate the object, with attribute 'source'
#' @field inchikey InChI Key, with attribute 'source'
#' @field smiles SMILES code, with attribute 'source'
#' @field mw Molecular weight, with attribute 'source'
#' @field pubchem List of information retreived from PubChem
#' @field rdkit List of information obtained with RDKit, if installed and
#' configured for use with PythonInR
#' @field Picture Graph as a \code{\link{picture}} object obtained using grImport
#' @field chyaml List of information obtained from a YAML file
#' @field degradation List of degradation endpoints
#' @example inst/examples/octanol.R
#' @example inst/examples/caffeine.R
#' @keywords data
chent <- R6Class("chent",
public <- list(
identifier = NULL,
inchikey = NULL,
smiles = NULL,
mw = NULL,
pubchem = NULL,
rdkit = NULL,
Picture = NULL,
chyaml = NULL,
degradation = NULL,
initialize = function(identifier, smiles = NULL,
rdkit = TRUE, pubchem = TRUE,
chyaml = TRUE) {
self$identifier <- identifier
names(self$identifier) <- make.names(identifier)
self$smiles <- c(user = smiles)
if (pubchem) {
self$try_pubchem(identifier)
}
if (rdkit) {
if(requireNamespace("PythonInR")) {
if (is.null(self$smiles)) {
stop("RDKit needs a SMILES code")
} else {
message("Trying to get chemical information from RDKit using ",
names(self$smiles)[1], " SMILES\n",
self$smiles[1])
self$get_rdkit()
self$mw <- self$rdkit$mw
attr(self$mw, "source") <- "rdkit"
}
}
}
if (chyaml) {
self$get_chyaml()
}
invisible(self)
},
try_pubchem = function(identifier) {
message("PubChem:")
if (missing(identifier)) identifier <- self$identifier
pubchem_cids = webchem::get_cid(identifier)
if (is.na(pubchem_cids[1])) {
message("Query ", identifier, " did not give results at PubChem")
} else {
message("Found ", length(pubchem_cids), " entries in PubChem, using the first one.")
self$get_pubchem(pubchem_cids[1])
}
},
get_pubchem = function(pubchem_cid) {
self$pubchem = as.list(webchem::pc_prop(pubchem_cid, from = "cid"))
self$pubchem$synonyms = webchem::pc_synonyms(pubchem_cid, from ="cid")[[1]]
self$smiles["PubChem_Canonical"] <- self$pubchem$CanonicalSMILES
if (self$pubchem$IsomericSMILES != self$pubchem$CanonicalSMILES) {
self$smiles["PubChem_Isomeric"] <- self$pubchem$IsomericSMILES
}
self$mw = as.numeric(self$pubchem$MolecularWeight)
attr(self$mw, "source") <- "pubchem"
if (is.null(self$inchikey)) {
self$inchikey <- self$pubchem$InChIKey
attr(self$inchikey, "source") <- "pubchem"
} else {
if (self$pubchem$InChIKey != self$inchikey) {
message("InChiKey ", self$pubchem$InChIKey, " from PubChem record does not match\n",
"InChiKey ", self$inchikey, " retreived from ",
attr(self$inchikey, "source"))
}
}
},
get_rdkit = function() {
if (!requireNamespace("PythonInR"))
stop("PythonInR can not be loaded")
id <- names(self$identifier)
if (!PythonInR::pyIsConnected()) {
PythonInR::pyConnect()
}
try_rdkit <- try(PythonInR::pyImport("Chem", from = "rdkit"))
if (inherits(try_rdkit, "try-error")) {
message("Could not import RDKit in Python session")
} else {
self$rdkit <- list()
PythonInR::pyImport("Descriptors", from = "rdkit.Chem")
PythonInR::pyExec(paste0("mol = Chem.MolFromSmiles('", self$smiles[1], "')"))
self$rdkit$mw <- PythonInR::pyExecg("mw = Descriptors.MolWt(mol)", "mw")
if (!is.null(self$mw)) {
if (round(self$rdkit$mw, 1) != round(self$mw, 1)) {
message("RDKit mw is ", self$rdkit$mw)
message("mw is ", self$mw)
}
}
# Create a grImport Picture
PythonInR::pyImport("Draw", from = "rdkit.Chem")
psfile <- tempfile(fileext = ".ps")
xmlfile <- tempfile(fileext = ".xml")
cmd <- paste0("Draw.MolToFile(mol, '", psfile, "')")
PythonInR::pyExec(cmd)
PostScriptTrace(psfile, outfilename = xmlfile)
unlink(paste0("capture", basename(psfile)))
self$Picture <- readPicture(xmlfile)
}
},
get_chyaml = function(repo = c("wd", "local", "web"),
chyaml = paste0(URLencode(self$identifier), ".yaml")) {
repo = match.arg(repo)
paths = c(wd = ".",
local = file.path("~", "git/chyaml"))
chyaml_handlers = list(
expr = function(x) NULL, # To avoid security risks from reading chyaml files
dataframe = function(x)
eval(parse(text = paste0("data.frame(", x, ", stringsAsFactors = FALSE)"))))
if (repo %in% c("wd", "local")) {
path = paths[repo]
full = file.path(path, chyaml)
if (!file.exists(full)) {
message("Did not find chyaml file ", full)
} else {
if (is(try(self$chyaml <- yaml.load_file(chyaml, handlers = chyaml_handlers)),
"try-error")) {
message("Could not load ", full)
} else {
message("Loaded ", full)
}
}
} else {
message("web repositories not implemented")
}
},
TPs = list(),
add_TP = function(x, smiles = NULL) {
if (inherits(x, "chent")) {
id <- names(x$identifier)
chent <- x
} else {
id <- make.names(x)
chent <- chent$new(x, smiles)
}
self$TPs[[id]] <- chent
},
transformations = data.frame(study_type = character(0),
TP_identifier = character(0),
max_occurrence = numeric(0),
source = character(0),
pages = character(0),
stringsAsFactors = FALSE),
add_transformation = function(study_type, TP_identifier, max_occurrence,
comment = "", source = NA, pages = NA) {
TP_name = make.names(TP_identifier)
if (!inherits(self$TPs[[TP_name]], "chent")) {
stop(paste("Please add the TP", TP_identifier, "first using chent$add_TP()"))
}
TP_chent <- self$TPs[TP_name]
if (is.numeric(pages)) pages <- paste(pages, collapse = ", ")
cn <- colnames(self$transformations)
self$transformations <- rbind(self$transformations,
data.frame(study_type = study_type,
TP_identifier = TP_identifier,
max_occurrence = max_occurrence,
comment = comment,
source = source,
pages = pages,
stringsAsFactors = FALSE))
},
soil_degradation_endpoints = data.frame(destination = character(0),
DT50 = numeric(0),
comment = character(0),
pages = character(0),
stringsAsFactors = FALSE),
add_soil_degradation_endpoints = function(destination, DT50 = NA,
comment = "", pages = NA) {
if (length(pages) > 1) pages = paste(pages, collapse = ", ")
i <- nrow(self$soil_degradation_endpoints) + 1
self$soil_degradation_endpoints[i, c("destination", "comment", "pages")] <-
c(destination, comment, pages)
self$soil_degradation_endpoints[i, "DT50"] <- DT50
},
ff = data.frame(from = character(0), to = character(0), ff = numeric(0),
comment = character(0), pages = character(0),
stringsAsFactors = FALSE),
add_ff = function(from = "parent", to, ff = 1, comment = "", pages = NA) {
i <- nrow(self$ff) + 1
if (from != "parent") {
if (!exists(from, self$TPs)) stop(from, " was not found in TPs")
}
if (!exists(to, self$TPs)) stop(to, " was not found in TPs")
self$ff[i, ] <- c(from, to, ff, comment, pages)
}
)
)
#' Printing method for chent objects
#'
#' @param x The chent object to be printed
#' @param ... Further arguments for compatibility with the S3 method
#' @importFrom utils head
#' @export
print.chent = function(x, ...) {
cat("<chent>\n")
cat("Identifier $identifier", x$identifier, "\n")
cat ("InChI Key $inchikey", x$inchikey, "\n")
cat ("SMILES string $smiles:\n")
print(x$smiles)
if (!is.null(x$mw)) cat ("Molecular weight $mw:", round(x$mw, 1), "\n")
if (!is.null(x$pubchem$synonyms)) {
cat ("PubChem synonyms (up to 10):\n")
print(head(x$pubchem$synonyms, n = 10L))
}
}
#' Draw SVG graph from a chent object using RDKit
#'
#' @param x The chent object to be plotted
#' @param width The desired width in pixels
#' @param height The desired height in pixels
#' @param filename The filename
#' @param subdir The path to which the file should be written
#' @export
draw_svg.chent = function(x, width = 300, height = 150,
filename = paste0(names(x$identifier), ".svg"),
subdir = "svg") {
if (!PythonInR::pyIsConnected()) {
PythonInR::pyConnect()
}
try_rdkit <- try(PythonInR::pyImport("Chem", from = "rdkit"))
if (inherits(try_rdkit, "try-error")) {
message("Could not import RDKit in Python session")
} else {
if (!dir.exists(subdir)) dir.create(subdir)
PythonInR::pyExec(paste0("mol = Chem.MolFromSmiles('", x$smiles, "')"))
PythonInR::pyImport("Draw", from = "rdkit.Chem")
cmd <- paste0("Draw.MolToFile(mol, '", file.path(subdir, filename),
"', size = (", width, ", ", height, "))")
PythonInR::pyExec(cmd)
}
}
#' Plot method for chent objects
#'
#' @importFrom grImport grid.picture
#' @param x The chent object to be plotted
#' @param ... Further arguments passed to \code{\link{grid.picture}}
#' @example inst/examples/caffeine.R
#' @export
plot.chent = function(x, ...) {
grid.picture(x$Picture)
}
#' An R6 class for pesticidal active ingredients and associated data
#'
#' The class is initialised with an identifier which is generally an ISO common name.
#' Additional chemical information is retrieved from the internet.
#'
#' @docType class
#' @importFrom R6 R6Class
#' @export
#' @format An \code{\link{R6Class}} generator object
#' @field iso ISO common name according to ISO 1750 as retreived from www.alanwood.net/pesticides
#' @field alanwood List of information retreived from www.alanwood.net/pesticides
#' @example inst/examples/pai.R
#' @keywords data
pai <- R6Class("pai",
inherit = chent,
public <- list(
iso = NULL,
alanwood = NULL,
initialize = function(iso, identifier = iso, smiles = NULL, alanwood = TRUE,
pubchem = TRUE, rdkit = TRUE, chyaml = TRUE) {
if (!missing(iso) & alanwood) {
message("alanwood.net:")
self$alanwood = webchem::aw_query(identifier, type = "commonname")[[1]]
if (is.na(self$alanwood[1])) {
message("Common name ", identifier, " is not known at www.alanwood.net, trying PubChem")
} else {
self$iso = self$alanwood$cname
attr(self$iso, "source") <- "alanwood"
attr(self$iso, "status") <- self$alanwood$status
self$inchikey = self$alanwood$inchikey
attr(self$inchikey, "source") <- "alanwood"
}
}
super$initialize(identifier = identifier, smiles = smiles,
pubchem = pubchem, rdkit = rdkit, chyaml = chyaml)
invisible(self)
}
)
)
#' Printing method for pai objects (pesticidal active ingredients)
#'
#' @param x The chent object to be printed
#' @param ... Further arguments for compatibility with the S3 method
#' @export
print.pai = function(x, ...) {
cat("<pai> with ISO common name $iso", x$iso, "\n")
print.chent(x)
if (length(x$TPs) > 0) {
cat("\nTransformation products:\n")
print(x$TPs)
}
if (nrow(x$transformations) > 0) {
cat("\nTransformations:\n")
print(x$transformations)
}
}
#' R6 class for holding a product with at least one active ingredient
#'
#' An R6 class for holding information about a product with at least one active ingredient
#'
#' @docType class
#' @importFrom R6 R6Class
#' @export
#' @format An \code{\link{R6Class}} generator object.
#' @field name The name of the product
#' @field ais A list of active ingredients
#' @field concentrations The concentration of the ais
#' @field concentration_units Defaults to g/L
#' @keywords data
pp <- R6Class("pp",
public <- list(
name = NULL,
ais = list(),
concentrations = NULL,
concentration_units = NULL,
density = NULL,
density_units = "g/L",
initialize = function(name, ..., concentrations, concentration_units = "g/L",
density = 1000, density_units = "g/L") {
self$name <- name
self$ais <- list(...)
self$concentrations <- concentrations
self$density <- density
self$density_units <- density_units
names(self$concentrations) <- names(self$ais)
self$concentration_units <- concentration_units
},
print = function() {
cat("<pp> named", self$name, "\n")
}
)
)
# vim: set ts=2 sw=2 expandtab: