From 26748e386a386f8459edb4f061012f099d34de7d Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Fri, 16 Oct 2015 15:35:33 +0200 Subject: Some improvements, especially in loading chyaml --- ChangeLog | 6 ++ DESCRIPTION | 4 +- R/chent.R | 140 ++++++++++++++++++++++++++------------------ inst/examples/caffeine.R | 3 +- inst/examples/octanol.R | 1 - man/chent.Rd | 8 ++- man/plot.chent.Rd | 3 +- tests/testthat/test_chent.R | 9 +-- tests/testthat/test_pai.R | 6 +- 9 files changed, 103 insertions(+), 77 deletions(-) diff --git a/ChangeLog b/ChangeLog index 0983a53..17db91f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +commit a892d1dc3c2a3724063f4d671f7f261ff5392884 +Author: Johannes Ranke +Date: 2015-10-02 13:01:38 +0200 + + Some maintenance work + commit f908377f1de2e04ca3720d10084169c46a477ce2 Author: Johannes Ranke Date: 2015-09-28 21:34:23 +0200 diff --git a/DESCRIPTION b/DESCRIPTION index 103a6ef..05d2e91 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: chents Type: Package Title: Chemical Entities as R Objects -Version: 0.2-1 -Date: 2015-10-02 +Version: 0.2-2 +Date: 2015-10-15 Authors@R: c(person("Johannes", "Ranke", role = c("aut", "cre", "cph"), email = "jranke@uni-bremen.de")) Description: Utilities for dealing with chemical entities and associated data as R objects. diff --git a/R/chent.R b/R/chent.R index 3f344b9..12ef15d 100644 --- a/R/chent.R +++ b/R/chent.R @@ -36,6 +36,8 @@ #' @field pubchem List of information retreived from PubChem #' @field rdkit List of information obtained with RDKit #' @field Picture Graph as a \code{\link{picture}} object obtained using grImport +#' @field chyaml List of information obtained from a YAML file +#' @field degradation List of degradation endpoints #' @example inst/examples/octanol.R #' @example inst/examples/caffeine.R #' @keywords data @@ -50,29 +52,40 @@ chent <- R6Class("chent", rdkit = NULL, Picture = NULL, chyaml = NULL, - initialize = function(identifier, smiles = NULL, - source = c("rdkit", "pubchem")) { + degradation = NULL, + initialize = function(identifier, smiles = NULL, + rdkit = TRUE, pubchem = TRUE, + chyaml = TRUE) { + self$identifier <- identifier names(self$identifier) <- make.names(identifier) - source = match.arg(source) - switch(source, - pubchem = { - self$try_pubchem(identifier) - }, - rdkit = { - if (is.null(smiles)) { - stop("rdkit needs smiles as input") - } else { - self$smiles <- smiles - self$get_rdkit() - self$mw <- self$rdkit$mw - attr(self$mw, "source") <- "rdkit" - } + + self$smiles <- c(user = smiles) + + if (pubchem) { + self$try_pubchem(identifier) + } + + if (rdkit) { + if (is.null(self$smiles)) { + stop("RDKit needs a SMILES code") + } else { + message("Trying to get chemical information from RDKit using ", + names(self$smiles)[1], " SMILES\n", + self$smiles[1]) + self$get_rdkit() + self$mw <- self$rdkit$mw + attr(self$mw, "source") <- "rdkit" } - ) + } + + if (chyaml) { + self$get_chyaml() + } invisible(self) }, try_pubchem = function(identifier) { + message("PubChem:") if (missing(identifier)) identifier <- self$identifier pubchem_cids = webchem::get_cid(identifier) @@ -86,9 +99,10 @@ chent <- R6Class("chent", get_pubchem = function(pubchem_cid) { self$pubchem = webchem::cid_compinfo(pubchem_cid) - self$smiles = self$pubchem$CanonicalSmiles - attr(self$smiles, "source") <- "pubchem" - attr(self$smiles, "type") <- "canonical" + self$smiles["PubChem_Canonical"] <- self$pubchem$CanonicalSmiles + if (self$pubchem$IsomericSmiles != self$pubchem$CanonicalSmiles) { + self$smiles["PubChem_Isomeric"] <- self$pubchem$IsomericSmiles + } self$mw = as.numeric(self$pubchem$MolecularWeight) attr(self$mw, "source") <- "pubchem" @@ -98,8 +112,9 @@ chent <- R6Class("chent", attr(self$inchikey, "source") <- "pubchem" } else { if (self$pubchem$InChIKey != self$inchikey) { - stop("InChiKey of PubChem record does not the one retreived from ", - attr(self$inchi, "source")) + message("InChiKey ", self$pubchem$InChIKey, " from PubChem record does not match\n", + "InChiKey ", self$inchikey, " retreived from ", + attr(self$inchikey, "source")) } } }, @@ -114,7 +129,7 @@ chent <- R6Class("chent", } else { self$rdkit <- list() pyImport("Descriptors", from = "rdkit.Chem") - pyExec(paste0("mol = Chem.MolFromSmiles('", self$smiles, "')")) + pyExec(paste0("mol = Chem.MolFromSmiles('", self$smiles[1], "')")) self$rdkit$mw <- pyExecg("mw = Descriptors.MolWt(mol)", "mw") if (!is.null(self$mw)) { if (round(self$rdkit$mw, 1) != round(self$mw, 1)) { @@ -134,11 +149,32 @@ chent <- R6Class("chent", self$Picture <- readPicture(xmlfile) } }, - get_chyaml = function(repo = c("local", "web")) { + get_chyaml = function(repo = c("wd", "local", "web"), + chyaml = paste0(URLencode(self$identifier), ".yaml")) { repo = match.arg(repo) - if (repo == "local") { - self$chyaml = yaml.load_file(file.path("~", "git/chyaml", - paste0(URLencode(self$identifier), ".yaml"))) + paths = c(wd = ".", + local = file.path("~", "git/chyaml")) + + chyaml_handlers = list( + expr = function(x) NULL, # To avoid security risks from reading chyaml files + dataframe = function(x) + eval(parse(text = paste0("data.frame(", x, ", stringsAsFactors = FALSE)")))) + + if (repo %in% c("wd", "local")) { + path = paths[repo] + full = file.path(path, chyaml) + if (!file.exists(full)) { + message("Did not find chyaml file ", full) + } else { + if (is(try(self$chyaml <- yaml.load_file(chyaml, handlers = chyaml_handlers)), + "try-error")) { + message("Could not load ", full) + } else { + message("Loaded ", full) + } + } + } else { + message("web repositories not implemented") } }, TPs = list(), @@ -200,7 +236,8 @@ print.chent = function(x, ...) { cat("\n") cat("Identifier $identifier", x$identifier, "\n") cat ("InChI Key $inchikey", x$inchikey, "\n") - cat ("SMILES string $smiles", x$smiles, "\n") + cat ("SMILES string $smiles:\n") + print(x$smiles) if (!is.null(x$mw)) cat ("Molecular weight $mw:", round(x$mw, 1), "\n") if (!is.null(x$pubchem)) { cat ("PubChem synonyms (first 10):\n") @@ -266,37 +303,26 @@ pai <- R6Class("pai", public <- list( iso = NULL, alanwood = NULL, - initialize = function(identifier, type = c("name", "smiles"), - source = c("alanwood", "pubchem")) { - self$identifier <- identifier - names(self$identifier) <- make.names(identifier) - type = match.arg(type) - attr(self$identifier, "type") <- type - source = match.arg(source) - switch(source, - alanwood = { - self$alanwood = webchem::alanwood(identifier, type = "commonname") - if (is.na(self$alanwood[1])) { - message("Common name ", identifier, " is not known at www.alanwood.net, trying PubChem") - self$try_pubchem(identifier) - } else { - self$iso = self$alanwood$cname - attr(self$iso, "source") <- "alanwood" - attr(self$iso, "status") <- self$alanwood$status - self$inchikey = self$alanwood$inchikey - attr(self$inchikey, "source") <- "alanwood" + initialize = function(iso, identifier = iso, smiles = NULL, alanwood = TRUE, + pubchem = TRUE, rdkit = TRUE, chyaml = TRUE) { - # Get additional information from PubChem - pubchem_cids = get_cid(identifier) - self$get_pubchem(pubchem_cids[[1]]) - self$get_rdkit() - } - }, - pubchem = { - self$try_pubchem(identifier) - self$get_rdkit() + + if (!missing(iso) & alanwood) { + message("alanwood.net:") + self$alanwood = webchem::alanwood(identifier, type = "commonname") + if (is.na(self$alanwood[1])) { + message("Common name ", identifier, " is not known at www.alanwood.net, trying PubChem") + } else { + self$iso = self$alanwood$cname + attr(self$iso, "source") <- "alanwood" + attr(self$iso, "status") <- self$alanwood$status + self$inchikey = self$alanwood$inchikey + attr(self$inchikey, "source") <- "alanwood" } - ) + } + super$initialize(identifier = identifier, smiles = smiles, + pubchem = pubchem, rdkit = rdkit, chyaml = chyaml) + invisible(self) } ) diff --git a/inst/examples/caffeine.R b/inst/examples/caffeine.R index ab794ac..29a7e0b 100644 --- a/inst/examples/caffeine.R +++ b/inst/examples/caffeine.R @@ -1,4 +1,3 @@ -caffeine <- chent$new("caffeine", source = "pubchem") +caffeine <- chent$new("caffeine") print(caffeine) -caffeine$get_rdkit() plot(caffeine) diff --git a/inst/examples/octanol.R b/inst/examples/octanol.R index 91e23cf..3e42c38 100644 --- a/inst/examples/octanol.R +++ b/inst/examples/octanol.R @@ -1,4 +1,3 @@ oct <- chent$new("1-octanol", smiles = "CCCCCCCCO") -oct$try_pubchem() print(oct) plot(oct) diff --git a/man/chent.Rd b/man/chent.Rd index da68b56..00516b9 100644 --- a/man/chent.Rd +++ b/man/chent.Rd @@ -29,15 +29,17 @@ python bindings are installed. \item{\code{rdkit}}{List of information obtained with RDKit} \item{\code{Picture}}{Graph as a \code{\link{picture}} object obtained using grImport} + +\item{\code{chyaml}}{List of information obtained from a YAML file} + +\item{\code{degradation}}{List of degradation endpoints} }} \examples{ oct <- chent$new("1-octanol", smiles = "CCCCCCCCO") -oct$try_pubchem() print(oct) plot(oct) -caffeine <- chent$new("caffeine", source = "pubchem") +caffeine <- chent$new("caffeine") print(caffeine) -caffeine$get_rdkit() plot(caffeine) } \keyword{data} diff --git a/man/plot.chent.Rd b/man/plot.chent.Rd index f2e7572..3fd2ac6 100644 --- a/man/plot.chent.Rd +++ b/man/plot.chent.Rd @@ -15,9 +15,8 @@ Plot method for chent objects } \examples{ -caffeine <- chent$new("caffeine", source = "pubchem") +caffeine <- chent$new("caffeine") print(caffeine) -caffeine$get_rdkit() plot(caffeine) } diff --git a/tests/testthat/test_chent.R b/tests/testthat/test_chent.R index da018c5..28d120e 100644 --- a/tests/testthat/test_chent.R +++ b/tests/testthat/test_chent.R @@ -1,11 +1,11 @@ context("Generation of chent objects") -oct <- chent$new("1-octanol", smiles = "CCCCCCCCO") +oct <- chent$new("1-octanol", smiles = "CCCCCCCCO", pubchem = FALSE, chyaml = FALSE) test_that("We can generate a chent object from SMILES using RDKit", { expect_equivalent(round(oct$mw, 2), 130.23) expect_equal(names(oct$identifier), "X1.octanol") - expect_equal(oct$smiles, "CCCCCCCCO") + expect_equal(oct$smiles[["user"]], "CCCCCCCCO") }) test_that("We can add information retrieved from PubChem via webchem", { @@ -14,8 +14,5 @@ test_that("We can add information retrieved from PubChem via webchem", { ik = "KBPLFHHGFOOTCA-UHFFFAOYSA-N" attr(ik, "source") <- "pubchem" expect_equal(oct$inchikey, ik) - smiles <- "CCCCCCCCO" - attr(smiles, "source") <- "pubchem" - attr(smiles, "type") <- "canonical" - expect_equal(oct$smiles, smiles) + expect_equal(oct$smiles[["PubChem_Canonical"]], "CCCCCCCCO") }) diff --git a/tests/testthat/test_pai.R b/tests/testthat/test_pai.R index dd0b235..944ad04 100644 --- a/tests/testthat/test_pai.R +++ b/tests/testthat/test_pai.R @@ -1,6 +1,6 @@ context("Generation of pai objects") -glyphosate <- pai$new("glyphosate") +glyphosate <- pai$new("glyphosate", chyaml = FALSE) test_that("We can generate a pai object from its ISO common name", { expect_equivalent(glyphosate$alanwood$cas, "1071-83-6") @@ -19,7 +19,5 @@ test_that("RDKit information was added", { test_that("PubChem information was added via webchem", { expect_equivalent(round(glyphosate$mw, 2), 169.07) smiles <- "C(C(=O)O)NCP(=O)(O)O" - attr(smiles, "source") <- "pubchem" - attr(smiles, "type") <- "canonical" - expect_equal(glyphosate$smiles, smiles) + expect_equal(glyphosate$smiles[["PubChem_Canonical"]], smiles) }) -- cgit v1.2.1