diff options
-rw-r--r-- | DESCRIPTION | 4 | ||||
-rw-r--r-- | R/chent.R | 103 | ||||
-rw-r--r-- | man/pai.Rd | 2 | ||||
-rw-r--r-- | test.log | 11 | ||||
-rw-r--r-- | tests/testthat/test_pai.R | 22 |
5 files changed, 93 insertions, 49 deletions
diff --git a/DESCRIPTION b/DESCRIPTION index bc02e64..c50c9b4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: chents Type: Package Title: Chemical Entities as R Objects -Version: 0.2-4 -Date: 2016-10-13 +Version: 0.2-5 +Date: 2016-10-14 Authors@R: c(person("Johannes", "Ranke", role = c("aut", "cre", "cph"), email = "jranke@uni-bremen.de")) Description: Utilities for dealing with chemical entities and associated @@ -53,23 +53,31 @@ chent <- R6Class("chent", Picture = NULL, chyaml = NULL, degradation = NULL, - initialize = function(identifier, smiles = NULL, - rdkit = TRUE, pubchem = TRUE, + initialize = function(identifier, smiles = NULL, smiles_source = 'user', + inchikey = NULL, inchikey_source = 'user', + pubchem = TRUE, pubchem_from = c('name', 'smiles', 'inchikey'), + rdkit = TRUE, chyaml = TRUE) { self$identifier <- identifier names(self$identifier) <- make.names(identifier) + pubchem_from = match.arg(pubchem_from) self$smiles <- c(user = smiles) if (pubchem) { - self$try_pubchem(identifier) + if (pubchem_from == 'name') { + query = identifier + } else { + query = get(pubchem_from) + } + self$try_pubchem(query, from = pubchem_from) } if (rdkit) { if(requireNamespace("PythonInR", quietly = TRUE)) { if (is.null(self$smiles)) { - stop("RDKit needs a SMILES code") + message("RDKit would need a SMILES code") } else { message("Trying to get chemical information from RDKit using ", names(self$smiles)[1], " SMILES\n", @@ -86,16 +94,19 @@ chent <- R6Class("chent", } invisible(self) }, - try_pubchem = function(identifier) { + try_pubchem = function(query, from = 'name') { message("PubChem:") - if (missing(identifier)) identifier <- self$identifier - pubchem_cids = webchem::get_cid(identifier)[[identifier]] + if (missing(query)) query <- self$identifier + pubchem_result = webchem::get_cid(query, from = from) - if (is.na(pubchem_cids[1])) { - message("Query ", identifier, " did not give results at PubChem") + if (is.na(pubchem_result[[1]][1])) { + message("Query ", query, " did not give results at PubChem") } else { - message("Found ", length(pubchem_cids), " entries in PubChem, using the first one.") - self$get_pubchem(pubchem_cids[1]) + n_results = length(pubchem_result[[1]]) + if (n_results > 1) { + warning("Found ", n_results, " entries in PubChem, using the first one.") + } + self$get_pubchem(pubchem_result[[1]][1]) } }, get_pubchem = function(pubchem_cid) { @@ -115,10 +126,20 @@ chent <- R6Class("chent", self$inchikey <- self$pubchem$InChIKey attr(self$inchikey, "source") <- "pubchem" } else { - if (self$pubchem$InChIKey != self$inchikey) { - message("InChiKey ", self$pubchem$InChIKey, " from PubChem record does not match\n", - "InChiKey ", self$inchikey, " retreived from ", - attr(self$inchikey, "source")) + if (length(self$inchikey) > 1) { + message("InChIKey ", self$inchikey, " retreived from ", + attr(self$inchikey, "source"), + " has length > 1, using PubChem InChIKey") + self$inchikey <- self$pubchem$InChIKey + attr(self$inchikey, "source") <- "pubchem" + } else { + if (self$pubchem$InChIKey != self$inchikey) { + message("InChiKey ", self$pubchem$InChIKey, " from PubChem record does not match\n", + "InChiKey ", self$inchikey, " retreived from ", + attr(self$inchikey, "source")) + } else { + attr(self$inchikey, "source") <- c(attr(self$inchikey, "source"), "pubchem") + } } } }, @@ -319,7 +340,7 @@ plot.chent = function(x, ...) { #' An R6 class for pesticidal active ingredients and associated data #' #' The class is initialised with an identifier which is generally an ISO common name. -#' Additional chemical information is retrieved from the internet. +#' Additional chemical information is retrieved from the internet if available. #' #' @docType class #' @importFrom R6 R6Class @@ -335,26 +356,60 @@ pai <- R6Class("pai", public <- list( iso = NULL, alanwood = NULL, - initialize = function(iso, identifier = iso, smiles = NULL, alanwood = TRUE, - pubchem = TRUE, rdkit = TRUE, chyaml = TRUE) { + initialize = function(iso, identifier = iso, + smiles = NULL, smiles_source = 'user', + inchikey = NULL, inchikey_source = 'user', + alanwood = TRUE, + pubchem = TRUE, pubchem_from = 'auto', + rdkit = TRUE, chyaml = TRUE) + { + if (!is.null(inchikey)) { + self$inchikey = inchikey + attr(self$inchikey, "source") <- "user" + } if (!missing(iso) & alanwood) { message("alanwood.net:") - self$alanwood = webchem::aw_query(identifier, type = "commonname")[[1]] - if (is.na(self$alanwood[1])) { + aw_result = webchem::aw_query(identifier, type = "commonname") + + # Use first element of list, as we passed a query of length one + if (is.na(aw_result[[1]][1])) { message("Common name ", identifier, " is not known at www.alanwood.net, trying PubChem") } else { + self$alanwood = aw_result[[1]] self$iso = self$alanwood$cname attr(self$iso, "source") <- "alanwood" attr(self$iso, "status") <- self$alanwood$status - self$inchikey = self$alanwood$inchikey - attr(self$inchikey, "source") <- "alanwood" + aw_ik = self$alanwood$inchikey + if (length(aw_ik) == 1 && nchar(aw_ik) == 27 && !is.na(aw_ik)) { + if (is.null(self$inchikey)) { + self$inchikey = self$alanwood$inchikey + attr(self$inchikey, "source") <- "alanwood" + } else { + if (aw_ik == self$inchikey) { + attr(self$inchikey, "source") = c(attr(self$inchikey, "source"), "alanwood") + } else { + warning("InChIKey ", self$inchikey, " differs from ", aw_ik, " obtained from alanwood.net") + } + } + } + } + } + + # Set pubchem_from if not specified + if (pubchem_from == 'auto') { + pubchem_from = 'name' + if (!is.null(self$inchikey)) { + pubchem_from = 'inchikey' } } - super$initialize(identifier = identifier, smiles = smiles, - pubchem = pubchem, rdkit = rdkit, chyaml = chyaml) + super$initialize(identifier = identifier, + smiles = smiles, smiles_source = smiles_source, + inchikey = self$inchikey, + pubchem = pubchem, pubchem_from = pubchem_from, + rdkit = rdkit, chyaml = chyaml) invisible(self) } @@ -10,7 +10,7 @@ pai } \description{ The class is initialised with an identifier which is generally an ISO common name. -Additional chemical information is retrieved from the internet. +Additional chemical information is retrieved from the internet if available. } \section{Fields}{ @@ -8,15 +8,6 @@ Initialize Python Version 2.7.9 (default, Jun 29 2016, 13:11:10) Trying to get chemical information from RDKit using user SMILES CCCCCCCCO ...... -Generation of pai objects: alanwood.net: -Querying glyphosate.html -PubChem: -http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/cids/JSON -Found 1 entries in PubChem, using the first one. -http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/property/MolecularFormula,MolecularWeight,CanonicalSMILES,IsomericSMILES,InChI,InChIKey,IUPACName,XLogP,ExactMass,MonoisotopicMass,TPSA,Complexity,Charge,HBondDonorCount,HBondAcceptorCount,RotatableBondCount,HeavyAtomCount,IsotopeAtomCount,AtomStereoCount,DefinedAtomStereoCount,UndefinedAtomStereoCount,BondStereoCount,DefinedBondStereoCount,UndefinedBondStereoCount,CovalentUnitCount,Volume3D,XStericQuadrupole3D,YStericQuadrupole3D,ZStericQuadrupole3D,FeatureCount3D,FeatureAcceptorCount3D,FeatureDonorCount3D,FeatureAnionCount3D,FeatureCationCount3D,FeatureRingCount3D,FeatureHydrophobeCount3D,ConformerModelRMSD3D,EffectiveRotorCount3D,ConformerCount3D,Fingerprint2D/JSON -http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON -Trying to get chemical information from RDKit using PubChem_Canonical SMILES -C(C(=O)O)NCP(=O)(O)O -....... +Generation of pai objects: ......... DONE =========================================================================== diff --git a/tests/testthat/test_pai.R b/tests/testthat/test_pai.R index 1718a9a..a0b7704 100644 --- a/tests/testthat/test_pai.R +++ b/tests/testthat/test_pai.R @@ -1,23 +1,21 @@ +# For manual use of this file +require(chents) +require(testthat) + context("Generation of pai objects") -glyphosate <- pai$new("glyphosate", chyaml = FALSE) - -test_that("a pai object is generated from its ISO common name", { +test_that("a pai object is correctly generated from an ambiguous name, with warning", { + expect_warning(glyphosate <- pai$new("glyphosate", chyaml = FALSE)) + expect_warning(pai$new("benzalkonium chloride", chyaml = FALSE)) + expect_equivalent(glyphosate$alanwood$cas, "1071-83-6") expect_equivalent(glyphosate$alanwood$formula, "C3H8NO5P") expect_equivalent(glyphosate$alanwood$iupac_name, "N-(phosphonomethyl)glycine") expect_equal(names(glyphosate$identifier), "glyphosate") ik = "XDDAORKBJWWYJS-UHFFFAOYSA-N" - attr(ik, "source") <- "alanwood" + attr(ik, "source") <- c("alanwood", "pubchem") expect_equal(glyphosate$inchikey, ik) -}) - -test_that("a pai object is generated from an ambiguous name", { - deltamethrin <- pai$new("deltamethrin", chyaml = FALSE) -}) - -test_that("PubChem information was added via webchem", { expect_equivalent(round(glyphosate$mw, 2), 169.07) - smiles <- "C(C(=O)O)NCP(=O)(O)O" + smiles <- "C(C(=O)O)[NH2+]CP(=O)(O)[O-]" expect_equal(glyphosate$smiles[["PubChem_Canonical"]], smiles) }) |