From e85f61000c139492ec6497aa3c6206a66b54d783 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Fri, 14 Oct 2016 15:24:43 +0200 Subject: Better handling of ambiguous names and "source"s --- ChangeLog | 12 ++++++ DESCRIPTION | 4 +- R/chent.R | 103 +++++++++++++++++++++++++++++++++++----------- man/pai.Rd | 2 +- test.log | 11 +---- tests/testthat/test_pai.R | 22 +++++----- 6 files changed, 105 insertions(+), 49 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2d1b1f9..735092e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +commit 6cfe5fbd827ca37f24134e8356e8144520ee1265 +Author: Johannes Ranke +Date: 2016-10-13 17:28:14 +0200 + + Remove unmaintained usage + +commit 5a9a777987fd7ac0d5724e4cfdb2178fa1567281 +Author: Johannes Ranke +Date: 2016-10-13 17:23:12 +0200 + + Commit changelog as test for mirroring on github + commit 291337e920cc95510fce3c0cdcc62b4443cd3bc4 Author: Johannes Ranke Date: 2016-10-13 14:03:19 +0200 diff --git a/DESCRIPTION b/DESCRIPTION index bc02e64..c50c9b4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: chents Type: Package Title: Chemical Entities as R Objects -Version: 0.2-4 -Date: 2016-10-13 +Version: 0.2-5 +Date: 2016-10-14 Authors@R: c(person("Johannes", "Ranke", role = c("aut", "cre", "cph"), email = "jranke@uni-bremen.de")) Description: Utilities for dealing with chemical entities and associated diff --git a/R/chent.R b/R/chent.R index fe87d5e..6606992 100644 --- a/R/chent.R +++ b/R/chent.R @@ -53,23 +53,31 @@ chent <- R6Class("chent", Picture = NULL, chyaml = NULL, degradation = NULL, - initialize = function(identifier, smiles = NULL, - rdkit = TRUE, pubchem = TRUE, + initialize = function(identifier, smiles = NULL, smiles_source = 'user', + inchikey = NULL, inchikey_source = 'user', + pubchem = TRUE, pubchem_from = c('name', 'smiles', 'inchikey'), + rdkit = TRUE, chyaml = TRUE) { self$identifier <- identifier names(self$identifier) <- make.names(identifier) + pubchem_from = match.arg(pubchem_from) self$smiles <- c(user = smiles) if (pubchem) { - self$try_pubchem(identifier) + if (pubchem_from == 'name') { + query = identifier + } else { + query = get(pubchem_from) + } + self$try_pubchem(query, from = pubchem_from) } if (rdkit) { if(requireNamespace("PythonInR", quietly = TRUE)) { if (is.null(self$smiles)) { - stop("RDKit needs a SMILES code") + message("RDKit would need a SMILES code") } else { message("Trying to get chemical information from RDKit using ", names(self$smiles)[1], " SMILES\n", @@ -86,16 +94,19 @@ chent <- R6Class("chent", } invisible(self) }, - try_pubchem = function(identifier) { + try_pubchem = function(query, from = 'name') { message("PubChem:") - if (missing(identifier)) identifier <- self$identifier - pubchem_cids = webchem::get_cid(identifier)[[identifier]] + if (missing(query)) query <- self$identifier + pubchem_result = webchem::get_cid(query, from = from) - if (is.na(pubchem_cids[1])) { - message("Query ", identifier, " did not give results at PubChem") + if (is.na(pubchem_result[[1]][1])) { + message("Query ", query, " did not give results at PubChem") } else { - message("Found ", length(pubchem_cids), " entries in PubChem, using the first one.") - self$get_pubchem(pubchem_cids[1]) + n_results = length(pubchem_result[[1]]) + if (n_results > 1) { + warning("Found ", n_results, " entries in PubChem, using the first one.") + } + self$get_pubchem(pubchem_result[[1]][1]) } }, get_pubchem = function(pubchem_cid) { @@ -115,10 +126,20 @@ chent <- R6Class("chent", self$inchikey <- self$pubchem$InChIKey attr(self$inchikey, "source") <- "pubchem" } else { - if (self$pubchem$InChIKey != self$inchikey) { - message("InChiKey ", self$pubchem$InChIKey, " from PubChem record does not match\n", - "InChiKey ", self$inchikey, " retreived from ", - attr(self$inchikey, "source")) + if (length(self$inchikey) > 1) { + message("InChIKey ", self$inchikey, " retreived from ", + attr(self$inchikey, "source"), + " has length > 1, using PubChem InChIKey") + self$inchikey <- self$pubchem$InChIKey + attr(self$inchikey, "source") <- "pubchem" + } else { + if (self$pubchem$InChIKey != self$inchikey) { + message("InChiKey ", self$pubchem$InChIKey, " from PubChem record does not match\n", + "InChiKey ", self$inchikey, " retreived from ", + attr(self$inchikey, "source")) + } else { + attr(self$inchikey, "source") <- c(attr(self$inchikey, "source"), "pubchem") + } } } }, @@ -319,7 +340,7 @@ plot.chent = function(x, ...) { #' An R6 class for pesticidal active ingredients and associated data #' #' The class is initialised with an identifier which is generally an ISO common name. -#' Additional chemical information is retrieved from the internet. +#' Additional chemical information is retrieved from the internet if available. #' #' @docType class #' @importFrom R6 R6Class @@ -335,26 +356,60 @@ pai <- R6Class("pai", public <- list( iso = NULL, alanwood = NULL, - initialize = function(iso, identifier = iso, smiles = NULL, alanwood = TRUE, - pubchem = TRUE, rdkit = TRUE, chyaml = TRUE) { + initialize = function(iso, identifier = iso, + smiles = NULL, smiles_source = 'user', + inchikey = NULL, inchikey_source = 'user', + alanwood = TRUE, + pubchem = TRUE, pubchem_from = 'auto', + rdkit = TRUE, chyaml = TRUE) + { + if (!is.null(inchikey)) { + self$inchikey = inchikey + attr(self$inchikey, "source") <- "user" + } if (!missing(iso) & alanwood) { message("alanwood.net:") - self$alanwood = webchem::aw_query(identifier, type = "commonname")[[1]] - if (is.na(self$alanwood[1])) { + aw_result = webchem::aw_query(identifier, type = "commonname") + + # Use first element of list, as we passed a query of length one + if (is.na(aw_result[[1]][1])) { message("Common name ", identifier, " is not known at www.alanwood.net, trying PubChem") } else { + self$alanwood = aw_result[[1]] self$iso = self$alanwood$cname attr(self$iso, "source") <- "alanwood" attr(self$iso, "status") <- self$alanwood$status - self$inchikey = self$alanwood$inchikey - attr(self$inchikey, "source") <- "alanwood" + aw_ik = self$alanwood$inchikey + if (length(aw_ik) == 1 && nchar(aw_ik) == 27 && !is.na(aw_ik)) { + if (is.null(self$inchikey)) { + self$inchikey = self$alanwood$inchikey + attr(self$inchikey, "source") <- "alanwood" + } else { + if (aw_ik == self$inchikey) { + attr(self$inchikey, "source") = c(attr(self$inchikey, "source"), "alanwood") + } else { + warning("InChIKey ", self$inchikey, " differs from ", aw_ik, " obtained from alanwood.net") + } + } + } + } + } + + # Set pubchem_from if not specified + if (pubchem_from == 'auto') { + pubchem_from = 'name' + if (!is.null(self$inchikey)) { + pubchem_from = 'inchikey' } } - super$initialize(identifier = identifier, smiles = smiles, - pubchem = pubchem, rdkit = rdkit, chyaml = chyaml) + super$initialize(identifier = identifier, + smiles = smiles, smiles_source = smiles_source, + inchikey = self$inchikey, + pubchem = pubchem, pubchem_from = pubchem_from, + rdkit = rdkit, chyaml = chyaml) invisible(self) } diff --git a/man/pai.Rd b/man/pai.Rd index 9c77e1e..c9d9a02 100644 --- a/man/pai.Rd +++ b/man/pai.Rd @@ -10,7 +10,7 @@ pai } \description{ The class is initialised with an identifier which is generally an ISO common name. -Additional chemical information is retrieved from the internet. +Additional chemical information is retrieved from the internet if available. } \section{Fields}{ diff --git a/test.log b/test.log index 158cb5a..a4eece6 100644 --- a/test.log +++ b/test.log @@ -8,15 +8,6 @@ Initialize Python Version 2.7.9 (default, Jun 29 2016, 13:11:10) Trying to get chemical information from RDKit using user SMILES CCCCCCCCO ...... -Generation of pai objects: alanwood.net: -Querying glyphosate.html -PubChem: -http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/cids/JSON -Found 1 entries in PubChem, using the first one. -http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/property/MolecularFormula,MolecularWeight,CanonicalSMILES,IsomericSMILES,InChI,InChIKey,IUPACName,XLogP,ExactMass,MonoisotopicMass,TPSA,Complexity,Charge,HBondDonorCount,HBondAcceptorCount,RotatableBondCount,HeavyAtomCount,IsotopeAtomCount,AtomStereoCount,DefinedAtomStereoCount,UndefinedAtomStereoCount,BondStereoCount,DefinedBondStereoCount,UndefinedBondStereoCount,CovalentUnitCount,Volume3D,XStericQuadrupole3D,YStericQuadrupole3D,ZStericQuadrupole3D,FeatureCount3D,FeatureAcceptorCount3D,FeatureDonorCount3D,FeatureAnionCount3D,FeatureCationCount3D,FeatureRingCount3D,FeatureHydrophobeCount3D,ConformerModelRMSD3D,EffectiveRotorCount3D,ConformerCount3D,Fingerprint2D/JSON -http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON -Trying to get chemical information from RDKit using PubChem_Canonical SMILES -C(C(=O)O)NCP(=O)(O)O -....... +Generation of pai objects: ......... DONE =========================================================================== diff --git a/tests/testthat/test_pai.R b/tests/testthat/test_pai.R index 1718a9a..a0b7704 100644 --- a/tests/testthat/test_pai.R +++ b/tests/testthat/test_pai.R @@ -1,23 +1,21 @@ +# For manual use of this file +require(chents) +require(testthat) + context("Generation of pai objects") -glyphosate <- pai$new("glyphosate", chyaml = FALSE) - -test_that("a pai object is generated from its ISO common name", { +test_that("a pai object is correctly generated from an ambiguous name, with warning", { + expect_warning(glyphosate <- pai$new("glyphosate", chyaml = FALSE)) + expect_warning(pai$new("benzalkonium chloride", chyaml = FALSE)) + expect_equivalent(glyphosate$alanwood$cas, "1071-83-6") expect_equivalent(glyphosate$alanwood$formula, "C3H8NO5P") expect_equivalent(glyphosate$alanwood$iupac_name, "N-(phosphonomethyl)glycine") expect_equal(names(glyphosate$identifier), "glyphosate") ik = "XDDAORKBJWWYJS-UHFFFAOYSA-N" - attr(ik, "source") <- "alanwood" + attr(ik, "source") <- c("alanwood", "pubchem") expect_equal(glyphosate$inchikey, ik) -}) - -test_that("a pai object is generated from an ambiguous name", { - deltamethrin <- pai$new("deltamethrin", chyaml = FALSE) -}) - -test_that("PubChem information was added via webchem", { expect_equivalent(round(glyphosate$mw, 2), 169.07) - smiles <- "C(C(=O)O)NCP(=O)(O)O" + smiles <- "C(C(=O)O)[NH2+]CP(=O)(O)[O-]" expect_equal(glyphosate$smiles[["PubChem_Canonical"]], smiles) }) -- cgit v1.2.1