aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohannes Ranke <jranke@uni-bremen.de>2016-10-14 15:24:43 +0200
committerJohannes Ranke <jranke@uni-bremen.de>2016-10-14 15:24:43 +0200
commite85f61000c139492ec6497aa3c6206a66b54d783 (patch)
tree7b008b9c1e7a212f1b05502af00c5b75f7c1f013
parent6cfe5fbd827ca37f24134e8356e8144520ee1265 (diff)
Better handling of ambiguous names and "source"s
-rw-r--r--ChangeLog12
-rw-r--r--DESCRIPTION4
-rw-r--r--R/chent.R103
-rw-r--r--man/pai.Rd2
-rw-r--r--test.log11
-rw-r--r--tests/testthat/test_pai.R22
6 files changed, 105 insertions, 49 deletions
diff --git a/ChangeLog b/ChangeLog
index 2d1b1f9..735092e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+commit 6cfe5fbd827ca37f24134e8356e8144520ee1265
+Author: Johannes Ranke <jranke@uni-bremen.de>
+Date: 2016-10-13 17:28:14 +0200
+
+ Remove unmaintained usage
+
+commit 5a9a777987fd7ac0d5724e4cfdb2178fa1567281
+Author: Johannes Ranke <jranke@uni-bremen.de>
+Date: 2016-10-13 17:23:12 +0200
+
+ Commit changelog as test for mirroring on github
+
commit 291337e920cc95510fce3c0cdcc62b4443cd3bc4
Author: Johannes Ranke <jranke@uni-bremen.de>
Date: 2016-10-13 14:03:19 +0200
diff --git a/DESCRIPTION b/DESCRIPTION
index bc02e64..c50c9b4 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
Package: chents
Type: Package
Title: Chemical Entities as R Objects
-Version: 0.2-4
-Date: 2016-10-13
+Version: 0.2-5
+Date: 2016-10-14
Authors@R: c(person("Johannes", "Ranke", role = c("aut", "cre", "cph"),
email = "jranke@uni-bremen.de"))
Description: Utilities for dealing with chemical entities and associated
diff --git a/R/chent.R b/R/chent.R
index fe87d5e..6606992 100644
--- a/R/chent.R
+++ b/R/chent.R
@@ -53,23 +53,31 @@ chent <- R6Class("chent",
Picture = NULL,
chyaml = NULL,
degradation = NULL,
- initialize = function(identifier, smiles = NULL,
- rdkit = TRUE, pubchem = TRUE,
+ initialize = function(identifier, smiles = NULL, smiles_source = 'user',
+ inchikey = NULL, inchikey_source = 'user',
+ pubchem = TRUE, pubchem_from = c('name', 'smiles', 'inchikey'),
+ rdkit = TRUE,
chyaml = TRUE) {
self$identifier <- identifier
names(self$identifier) <- make.names(identifier)
+ pubchem_from = match.arg(pubchem_from)
self$smiles <- c(user = smiles)
if (pubchem) {
- self$try_pubchem(identifier)
+ if (pubchem_from == 'name') {
+ query = identifier
+ } else {
+ query = get(pubchem_from)
+ }
+ self$try_pubchem(query, from = pubchem_from)
}
if (rdkit) {
if(requireNamespace("PythonInR", quietly = TRUE)) {
if (is.null(self$smiles)) {
- stop("RDKit needs a SMILES code")
+ message("RDKit would need a SMILES code")
} else {
message("Trying to get chemical information from RDKit using ",
names(self$smiles)[1], " SMILES\n",
@@ -86,16 +94,19 @@ chent <- R6Class("chent",
}
invisible(self)
},
- try_pubchem = function(identifier) {
+ try_pubchem = function(query, from = 'name') {
message("PubChem:")
- if (missing(identifier)) identifier <- self$identifier
- pubchem_cids = webchem::get_cid(identifier)[[identifier]]
+ if (missing(query)) query <- self$identifier
+ pubchem_result = webchem::get_cid(query, from = from)
- if (is.na(pubchem_cids[1])) {
- message("Query ", identifier, " did not give results at PubChem")
+ if (is.na(pubchem_result[[1]][1])) {
+ message("Query ", query, " did not give results at PubChem")
} else {
- message("Found ", length(pubchem_cids), " entries in PubChem, using the first one.")
- self$get_pubchem(pubchem_cids[1])
+ n_results = length(pubchem_result[[1]])
+ if (n_results > 1) {
+ warning("Found ", n_results, " entries in PubChem, using the first one.")
+ }
+ self$get_pubchem(pubchem_result[[1]][1])
}
},
get_pubchem = function(pubchem_cid) {
@@ -115,10 +126,20 @@ chent <- R6Class("chent",
self$inchikey <- self$pubchem$InChIKey
attr(self$inchikey, "source") <- "pubchem"
} else {
- if (self$pubchem$InChIKey != self$inchikey) {
- message("InChiKey ", self$pubchem$InChIKey, " from PubChem record does not match\n",
- "InChiKey ", self$inchikey, " retreived from ",
- attr(self$inchikey, "source"))
+ if (length(self$inchikey) > 1) {
+ message("InChIKey ", self$inchikey, " retreived from ",
+ attr(self$inchikey, "source"),
+ " has length > 1, using PubChem InChIKey")
+ self$inchikey <- self$pubchem$InChIKey
+ attr(self$inchikey, "source") <- "pubchem"
+ } else {
+ if (self$pubchem$InChIKey != self$inchikey) {
+ message("InChiKey ", self$pubchem$InChIKey, " from PubChem record does not match\n",
+ "InChiKey ", self$inchikey, " retreived from ",
+ attr(self$inchikey, "source"))
+ } else {
+ attr(self$inchikey, "source") <- c(attr(self$inchikey, "source"), "pubchem")
+ }
}
}
},
@@ -319,7 +340,7 @@ plot.chent = function(x, ...) {
#' An R6 class for pesticidal active ingredients and associated data
#'
#' The class is initialised with an identifier which is generally an ISO common name.
-#' Additional chemical information is retrieved from the internet.
+#' Additional chemical information is retrieved from the internet if available.
#'
#' @docType class
#' @importFrom R6 R6Class
@@ -335,26 +356,60 @@ pai <- R6Class("pai",
public <- list(
iso = NULL,
alanwood = NULL,
- initialize = function(iso, identifier = iso, smiles = NULL, alanwood = TRUE,
- pubchem = TRUE, rdkit = TRUE, chyaml = TRUE) {
+ initialize = function(iso, identifier = iso,
+ smiles = NULL, smiles_source = 'user',
+ inchikey = NULL, inchikey_source = 'user',
+ alanwood = TRUE,
+ pubchem = TRUE, pubchem_from = 'auto',
+ rdkit = TRUE, chyaml = TRUE)
+ {
+ if (!is.null(inchikey)) {
+ self$inchikey = inchikey
+ attr(self$inchikey, "source") <- "user"
+ }
if (!missing(iso) & alanwood) {
message("alanwood.net:")
- self$alanwood = webchem::aw_query(identifier, type = "commonname")[[1]]
- if (is.na(self$alanwood[1])) {
+ aw_result = webchem::aw_query(identifier, type = "commonname")
+
+ # Use first element of list, as we passed a query of length one
+ if (is.na(aw_result[[1]][1])) {
message("Common name ", identifier, " is not known at www.alanwood.net, trying PubChem")
} else {
+ self$alanwood = aw_result[[1]]
self$iso = self$alanwood$cname
attr(self$iso, "source") <- "alanwood"
attr(self$iso, "status") <- self$alanwood$status
- self$inchikey = self$alanwood$inchikey
- attr(self$inchikey, "source") <- "alanwood"
+ aw_ik = self$alanwood$inchikey
+ if (length(aw_ik) == 1 && nchar(aw_ik) == 27 && !is.na(aw_ik)) {
+ if (is.null(self$inchikey)) {
+ self$inchikey = self$alanwood$inchikey
+ attr(self$inchikey, "source") <- "alanwood"
+ } else {
+ if (aw_ik == self$inchikey) {
+ attr(self$inchikey, "source") = c(attr(self$inchikey, "source"), "alanwood")
+ } else {
+ warning("InChIKey ", self$inchikey, " differs from ", aw_ik, " obtained from alanwood.net")
+ }
+ }
+ }
+ }
+ }
+
+ # Set pubchem_from if not specified
+ if (pubchem_from == 'auto') {
+ pubchem_from = 'name'
+ if (!is.null(self$inchikey)) {
+ pubchem_from = 'inchikey'
}
}
- super$initialize(identifier = identifier, smiles = smiles,
- pubchem = pubchem, rdkit = rdkit, chyaml = chyaml)
+ super$initialize(identifier = identifier,
+ smiles = smiles, smiles_source = smiles_source,
+ inchikey = self$inchikey,
+ pubchem = pubchem, pubchem_from = pubchem_from,
+ rdkit = rdkit, chyaml = chyaml)
invisible(self)
}
diff --git a/man/pai.Rd b/man/pai.Rd
index 9c77e1e..c9d9a02 100644
--- a/man/pai.Rd
+++ b/man/pai.Rd
@@ -10,7 +10,7 @@ pai
}
\description{
The class is initialised with an identifier which is generally an ISO common name.
-Additional chemical information is retrieved from the internet.
+Additional chemical information is retrieved from the internet if available.
}
\section{Fields}{
diff --git a/test.log b/test.log
index 158cb5a..a4eece6 100644
--- a/test.log
+++ b/test.log
@@ -8,15 +8,6 @@ Initialize Python Version 2.7.9 (default, Jun 29 2016, 13:11:10)
Trying to get chemical information from RDKit using user SMILES
CCCCCCCCO
......
-Generation of pai objects: alanwood.net:
-Querying glyphosate.html
-PubChem:
-http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/cids/JSON
-Found 1 entries in PubChem, using the first one.
-http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/property/MolecularFormula,MolecularWeight,CanonicalSMILES,IsomericSMILES,InChI,InChIKey,IUPACName,XLogP,ExactMass,MonoisotopicMass,TPSA,Complexity,Charge,HBondDonorCount,HBondAcceptorCount,RotatableBondCount,HeavyAtomCount,IsotopeAtomCount,AtomStereoCount,DefinedAtomStereoCount,UndefinedAtomStereoCount,BondStereoCount,DefinedBondStereoCount,UndefinedBondStereoCount,CovalentUnitCount,Volume3D,XStericQuadrupole3D,YStericQuadrupole3D,ZStericQuadrupole3D,FeatureCount3D,FeatureAcceptorCount3D,FeatureDonorCount3D,FeatureAnionCount3D,FeatureCationCount3D,FeatureRingCount3D,FeatureHydrophobeCount3D,ConformerModelRMSD3D,EffectiveRotorCount3D,ConformerCount3D,Fingerprint2D/JSON
-http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
-Trying to get chemical information from RDKit using PubChem_Canonical SMILES
-C(C(=O)O)NCP(=O)(O)O
-.......
+Generation of pai objects: .........
DONE ===========================================================================
diff --git a/tests/testthat/test_pai.R b/tests/testthat/test_pai.R
index 1718a9a..a0b7704 100644
--- a/tests/testthat/test_pai.R
+++ b/tests/testthat/test_pai.R
@@ -1,23 +1,21 @@
+# For manual use of this file
+require(chents)
+require(testthat)
+
context("Generation of pai objects")
-glyphosate <- pai$new("glyphosate", chyaml = FALSE)
-
-test_that("a pai object is generated from its ISO common name", {
+test_that("a pai object is correctly generated from an ambiguous name, with warning", {
+ expect_warning(glyphosate <- pai$new("glyphosate", chyaml = FALSE))
+ expect_warning(pai$new("benzalkonium chloride", chyaml = FALSE))
+
expect_equivalent(glyphosate$alanwood$cas, "1071-83-6")
expect_equivalent(glyphosate$alanwood$formula, "C3H8NO5P")
expect_equivalent(glyphosate$alanwood$iupac_name, "N-(phosphonomethyl)glycine")
expect_equal(names(glyphosate$identifier), "glyphosate")
ik = "XDDAORKBJWWYJS-UHFFFAOYSA-N"
- attr(ik, "source") <- "alanwood"
+ attr(ik, "source") <- c("alanwood", "pubchem")
expect_equal(glyphosate$inchikey, ik)
-})
-
-test_that("a pai object is generated from an ambiguous name", {
- deltamethrin <- pai$new("deltamethrin", chyaml = FALSE)
-})
-
-test_that("PubChem information was added via webchem", {
expect_equivalent(round(glyphosate$mw, 2), 169.07)
- smiles <- "C(C(=O)O)NCP(=O)(O)O"
+ smiles <- "C(C(=O)O)[NH2+]CP(=O)(O)[O-]"
expect_equal(glyphosate$smiles[["PubChem_Canonical"]], smiles)
})

Contact - Imprint