From 81672751d5b23ed73c9b05bb02c52f0ee854de77 Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Tue, 27 Sep 2016 15:08:20 +0200 Subject: PythonInR optional, adapt to current webchem --- ChangeLog | 12 ++++++ DESCRIPTION | 25 ++++++++---- NAMESPACE | 8 +--- R/chent.R | 103 ++++++++++++++++++++++++++++++-------------------- man/chent.Rd | 7 ++-- man/draw_svg.chent.Rd | 2 +- man/pai.Rd | 2 +- man/plot.chent.Rd | 2 +- man/pp.Rd | 2 +- man/print.chent.Rd | 2 +- man/print.pai.Rd | 2 +- test.log | 22 +++++++---- 12 files changed, 117 insertions(+), 72 deletions(-) diff --git a/ChangeLog b/ChangeLog index 17db91f..f6450ed 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +commit f48d2ddb338c3050b3b6ccaed636366a546c5275 +Author: Johannes Ranke +Date: 2016-09-27 15:08:20 +0200 + + PythonInR (and RDkit) optional, adapt to current webchem + +commit 26748e386a386f8459edb4f061012f099d34de7d +Author: Johannes Ranke +Date: 2015-10-16 15:35:33 +0200 + + Some improvements, especially in loading chyaml + commit a892d1dc3c2a3724063f4d671f7f261ff5392884 Author: Johannes Ranke Date: 2015-10-02 13:01:38 +0200 diff --git a/DESCRIPTION b/DESCRIPTION index 05d2e91..639cfe2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,19 +1,28 @@ Package: chents Type: Package Title: Chemical Entities as R Objects -Version: 0.2-2 -Date: 2015-10-15 +Version: 0.2-3 +Date: 2016-09-27 Authors@R: c(person("Johannes", "Ranke", role = c("aut", "cre", "cph"), email = "jranke@uni-bremen.de")) -Description: Utilities for dealing with chemical entities and associated data as R objects. - If Python and RDKit are installed and configured for use with 'PythonInR', - some basic chemoinformatics functions like the calculation of molecular weight - and plotting of chemical structures in R graphics are available. -Imports: webchem, R6, grImport, PythonInR, yaml -Suggests: knitr, testthat +Description: Utilities for dealing with chemical entities and associated + data as R objects. If Python and RDKit are installed and configured for use + with 'PythonInR', some basic chemoinformatics functions like the calculation + of molecular weight and plotting of chemical structures in R graphics are + available. +Imports: + webchem, + R6, + grImport, + yaml +Suggests: + knitr, + testthat, + PythonInR License: GPL LazyLoad: yes LazyData: yes Encoding: UTF-8 VignetteBuilder: knitr URL: http://cgit.jrwb.de/chents +RoxygenNote: 5.0.1 diff --git a/NAMESPACE b/NAMESPACE index 2cbde5a..987a658 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,4 @@ -# Generated by roxygen2 (4.1.1): do not edit by hand +# Generated by roxygen2: do not edit by hand S3method(plot,chent) S3method(print,chent) @@ -7,15 +7,11 @@ export(chent) export(draw_svg.chent) export(pai) export(pp) -importFrom(PythonInR,pyConnect) -importFrom(PythonInR,pyExec) -importFrom(PythonInR,pyExecg) -importFrom(PythonInR,pyImport) -importFrom(PythonInR,pyIsConnected) importFrom(R6,R6Class) importFrom(grImport,PostScriptTrace) importFrom(grImport,grid.picture) importFrom(grImport,readPicture) +importFrom(utils,head) importFrom(webchem,cid_compinfo) importFrom(webchem,get_cid) importFrom(yaml,yaml.load_file) diff --git a/R/chent.R b/R/chent.R index 12ef15d..dd51798 100644 --- a/R/chent.R +++ b/R/chent.R @@ -1,4 +1,4 @@ -# Copyright (C) 2015 Johannes Ranke +# Copyright (C) 2016 Johannes Ranke # Contact: jranke@uni-bremen.de # This file is part of the R package chents @@ -19,7 +19,7 @@ #' #' The class is initialised with an identifier. Chemical information is retrieved from #' the internet. Additionally, it can be generated using RDKit if RDKit and its -#' python bindings are installed. +#' python bindings are installed and configured for use with PythonInR. #' #' @docType class #' @export @@ -27,14 +27,14 @@ #' @importFrom R6 R6Class #' @importFrom webchem get_cid cid_compinfo #' @importFrom grImport PostScriptTrace readPicture -#' @importFrom PythonInR pyIsConnected pyConnect pyImport pyExec pyExecg #' @importFrom yaml yaml.load_file #' @field identifier The identifier that was used to initiate the object, with attribute 'source' #' @field inchikey InChI Key, with attribute 'source' #' @field smiles SMILES code, with attribute 'source' #' @field mw Molecular weight, with attribute 'source' #' @field pubchem List of information retreived from PubChem -#' @field rdkit List of information obtained with RDKit +#' @field rdkit List of information obtained with RDKit, if installed and +#' configured for use with PythonInR #' @field Picture Graph as a \code{\link{picture}} object obtained using grImport #' @field chyaml List of information obtained from a YAML file #' @field degradation List of degradation endpoints @@ -67,15 +67,17 @@ chent <- R6Class("chent", } if (rdkit) { - if (is.null(self$smiles)) { - stop("RDKit needs a SMILES code") - } else { - message("Trying to get chemical information from RDKit using ", - names(self$smiles)[1], " SMILES\n", - self$smiles[1]) - self$get_rdkit() - self$mw <- self$rdkit$mw - attr(self$mw, "source") <- "rdkit" + if(requireNamespace("PythonInR")) { + if (is.null(self$smiles)) { + stop("RDKit needs a SMILES code") + } else { + message("Trying to get chemical information from RDKit using ", + names(self$smiles)[1], " SMILES\n", + self$smiles[1]) + self$get_rdkit() + self$mw <- self$rdkit$mw + attr(self$mw, "source") <- "rdkit" + } } } @@ -89,19 +91,21 @@ chent <- R6Class("chent", if (missing(identifier)) identifier <- self$identifier pubchem_cids = webchem::get_cid(identifier) - if (is.na(pubchem_cids[1])) { - stop("Query ", identifier, " did not give results at PubChem") - } else { - message("Found ", length(pubchem_cids), " entries in PubChem, using the first one.") - self$get_pubchem(pubchem_cids[1]) - } + if (is.na(pubchem_cids[1])) { + message("Query ", identifier, " did not give results at PubChem") + } else { + message("Found ", length(pubchem_cids), " entries in PubChem, using the first one.") + self$get_pubchem(pubchem_cids[1]) + } }, get_pubchem = function(pubchem_cid) { - self$pubchem = webchem::cid_compinfo(pubchem_cid) + self$pubchem = as.list(webchem::pc_prop(pubchem_cid, from = "cid")) + self$pubchem$synonyms = webchem::pc_synonyms(pubchem_cid, from ="cid")[[1]] + + self$smiles["PubChem_Canonical"] <- self$pubchem$CanonicalSMILES - self$smiles["PubChem_Canonical"] <- self$pubchem$CanonicalSmiles - if (self$pubchem$IsomericSmiles != self$pubchem$CanonicalSmiles) { - self$smiles["PubChem_Isomeric"] <- self$pubchem$IsomericSmiles + if (self$pubchem$IsomericSMILES != self$pubchem$CanonicalSMILES) { + self$smiles["PubChem_Isomeric"] <- self$pubchem$IsomericSMILES } self$mw = as.numeric(self$pubchem$MolecularWeight) @@ -119,18 +123,20 @@ chent <- R6Class("chent", } }, get_rdkit = function() { + if (!requireNamespace("PythonInR")) + stop("PythonInR can not be loaded") id <- names(self$identifier) - if (!pyIsConnected()) { - pyConnect() + if (!PythonInR::pyIsConnected()) { + PythonInR::pyConnect() } - try_rdkit <- try(pyImport("Chem", from = "rdkit")) + try_rdkit <- try(PythonInR::pyImport("Chem", from = "rdkit")) if (inherits(try_rdkit, "try-error")) { message("Could not import RDKit in Python session") } else { self$rdkit <- list() - pyImport("Descriptors", from = "rdkit.Chem") - pyExec(paste0("mol = Chem.MolFromSmiles('", self$smiles[1], "')")) - self$rdkit$mw <- pyExecg("mw = Descriptors.MolWt(mol)", "mw") + PythonInR::pyImport("Descriptors", from = "rdkit.Chem") + PythonInR::pyExec(paste0("mol = Chem.MolFromSmiles('", self$smiles[1], "')")) + self$rdkit$mw <- PythonInR::pyExecg("mw = Descriptors.MolWt(mol)", "mw") if (!is.null(self$mw)) { if (round(self$rdkit$mw, 1) != round(self$mw, 1)) { message("RDKit mw is ", self$rdkit$mw) @@ -139,11 +145,11 @@ chent <- R6Class("chent", } # Create a grImport Picture - pyImport("Draw", from = "rdkit.Chem") + PythonInR::pyImport("Draw", from = "rdkit.Chem") psfile <- tempfile(fileext = ".ps") xmlfile <- tempfile(fileext = ".xml") cmd <- paste0("Draw.MolToFile(mol, '", psfile, "')") - pyExec(cmd) + PythonInR::pyExec(cmd) PostScriptTrace(psfile, outfilename = xmlfile) unlink(paste0("capture", basename(psfile))) self$Picture <- readPicture(xmlfile) @@ -209,7 +215,8 @@ chent <- R6Class("chent", max_occurrence = max_occurrence, comment = comment, source = source, - pages = pages)) + pages = pages, + stringsAsFactors = FALSE)) }, soil_degradation_endpoints = data.frame(destination = character(0), DT50 = numeric(0), @@ -223,6 +230,17 @@ chent <- R6Class("chent", self$soil_degradation_endpoints[i, c("destination", "comment", "pages")] <- c(destination, comment, pages) self$soil_degradation_endpoints[i, "DT50"] <- DT50 + }, + ff = data.frame(from = character(0), to = character(0), ff = numeric(0), + comment = character(0), pages = character(0), + stringsAsFactors = FALSE), + add_ff = function(from = "parent", to, ff = 1, comment = "", pages = NA) { + i <- nrow(self$ff) + 1 + if (from != "parent") { + if (!exists(from, self$TPs)) stop(from, " was not found in TPs") + } + if (!exists(to, self$TPs)) stop(to, " was not found in TPs") + self$ff[i, ] <- c(from, to, ff, comment, pages) } ) ) @@ -231,6 +249,7 @@ chent <- R6Class("chent", #' #' @param x The chent object to be printed #' @param ... Further arguments for compatibility with the S3 method +#' @importFrom utils head #' @export print.chent = function(x, ...) { cat("\n") @@ -239,15 +258,14 @@ print.chent = function(x, ...) { cat ("SMILES string $smiles:\n") print(x$smiles) if (!is.null(x$mw)) cat ("Molecular weight $mw:", round(x$mw, 1), "\n") - if (!is.null(x$pubchem)) { - cat ("PubChem synonyms (first 10):\n") + if (!is.null(x$pubchem$synonyms)) { + cat ("PubChem synonyms (up to 10):\n") print(head(x$pubchem$synonyms, n = 10L)) } } #' Draw SVG graph from a chent object using RDKit #' -#' @importFrom PythonInR pyIsConnected pyConnect pyImport pyExec #' @param x The chent object to be plotted #' @param width The desired width in pixels #' @param height The desired height in pixels @@ -257,19 +275,19 @@ print.chent = function(x, ...) { draw_svg.chent = function(x, width = 300, height = 150, filename = paste0(names(x$identifier), ".svg"), subdir = "svg") { - if (!pyIsConnected()) { - pyConnect() + if (!PythonInR::pyIsConnected()) { + PythonInR::pyConnect() } - try_rdkit <- try(pyImport("Chem", from = "rdkit")) + try_rdkit <- try(PythonInR::pyImport("Chem", from = "rdkit")) if (inherits(try_rdkit, "try-error")) { message("Could not import RDKit in Python session") } else { if (!dir.exists(subdir)) dir.create(subdir) - pyExec(paste0("mol = Chem.MolFromSmiles('", x$smiles, "')")) - pyImport("Draw", from = "rdkit.Chem") + PythonInR::pyExec(paste0("mol = Chem.MolFromSmiles('", x$smiles, "')")) + PythonInR::pyImport("Draw", from = "rdkit.Chem") cmd <- paste0("Draw.MolToFile(mol, '", file.path(subdir, filename), "', size = (", width, ", ", height, "))") - pyExec(cmd) + PythonInR::pyExec(cmd) } } @@ -309,7 +327,7 @@ pai <- R6Class("pai", if (!missing(iso) & alanwood) { message("alanwood.net:") - self$alanwood = webchem::alanwood(identifier, type = "commonname") + self$alanwood = webchem::aw_query(identifier, type = "commonname")[[1]] if (is.na(self$alanwood[1])) { message("Common name ", identifier, " is not known at www.alanwood.net, trying PubChem") } else { @@ -320,6 +338,7 @@ pai <- R6Class("pai", attr(self$inchikey, "source") <- "alanwood" } } + super$initialize(identifier = identifier, smiles = smiles, pubchem = pubchem, rdkit = rdkit, chyaml = chyaml) diff --git a/man/chent.Rd b/man/chent.Rd index 00516b9..e2a0c77 100644 --- a/man/chent.Rd +++ b/man/chent.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/chent.R \docType{class} \name{chent} @@ -11,7 +11,7 @@ chent \description{ The class is initialised with an identifier. Chemical information is retrieved from the internet. Additionally, it can be generated using RDKit if RDKit and its -python bindings are installed. +python bindings are installed and configured for use with PythonInR. } \section{Fields}{ @@ -26,7 +26,8 @@ python bindings are installed. \item{\code{pubchem}}{List of information retreived from PubChem} -\item{\code{rdkit}}{List of information obtained with RDKit} +\item{\code{rdkit}}{List of information obtained with RDKit, if installed and +configured for use with PythonInR} \item{\code{Picture}}{Graph as a \code{\link{picture}} object obtained using grImport} diff --git a/man/draw_svg.chent.Rd b/man/draw_svg.chent.Rd index 87234a1..c78763f 100644 --- a/man/draw_svg.chent.Rd +++ b/man/draw_svg.chent.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/chent.R \name{draw_svg.chent} \alias{draw_svg.chent} diff --git a/man/pai.Rd b/man/pai.Rd index c366991..9c77e1e 100644 --- a/man/pai.Rd +++ b/man/pai.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/chent.R \docType{class} \name{pai} diff --git a/man/plot.chent.Rd b/man/plot.chent.Rd index 3fd2ac6..a1f618a 100644 --- a/man/plot.chent.Rd +++ b/man/plot.chent.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/chent.R \name{plot.chent} \alias{plot.chent} diff --git a/man/pp.Rd b/man/pp.Rd index 5be85bf..6adffb8 100644 --- a/man/pp.Rd +++ b/man/pp.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/chent.R \docType{class} \name{pp} diff --git a/man/print.chent.Rd b/man/print.chent.Rd index 56f04f5..8f9dfec 100644 --- a/man/print.chent.Rd +++ b/man/print.chent.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/chent.R \name{print.chent} \alias{print.chent} diff --git a/man/print.pai.Rd b/man/print.pai.Rd index d7ee332..6206ea5 100644 --- a/man/print.pai.Rd +++ b/man/print.pai.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/chent.R \name{print.pai} \alias{print.pai} diff --git a/test.log b/test.log index 049ffe1..e2dbdd5 100644 --- a/test.log +++ b/test.log @@ -1,15 +1,23 @@ Loading chents Loading required package: testthat -Loading required package: methods -Initialize Python Version 2.7.9 (default, Mar 1 2015, 13:01:26) +Initialize Python Version 2.7.9 (default, Jun 29 2016, 13:11:10) [GCC 4.9.2] Testing chents -Generation of chent objects : ...... -Generation of pai objects : Querying glyphosate.html -http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?retmax=100000&db=pccompound&term=glyphosate -http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?retmax=100000&db=pccompound&ID=3496 +Generation of chent objects: Trying to get chemical information from RDKit using user SMILES +CCCCCCCCO +...... +Generation of pai objects: alanwood.net: +Querying glyphosate.html +PubChem: +http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/glyphosate/cids/JSON +No encoding supplied: defaulting to UTF-8. +Found 1 entries in PubChem, using the first one. +http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/property/MolecularFormula,MolecularWeight,CanonicalSMILES,IsomericSMILES,InChI,InChIKey,IUPACName,XLogP,ExactMass,MonoisotopicMass,TPSA,Complexity,Charge,HBondDonorCount,HBondAcceptorCount,RotatableBondCount,HeavyAtomCount,IsotopeAtomCount,AtomStereoCount,DefinedAtomStereoCount,UndefinedAtomStereoCount,BondStereoCount,DefinedBondStereoCount,UndefinedBondStereoCount,CovalentUnitCount,Volume3D,XStericQuadrupole3D,YStericQuadrupole3D,ZStericQuadrupole3D,FeatureCount3D,FeatureAcceptorCount3D,FeatureDonorCount3D,FeatureAnionCount3D,FeatureCationCount3D,FeatureRingCount3D,FeatureHydrophobeCount3D,ConformerModelRMSD3D,EffectiveRotorCount3D,ConformerCount3D,Fingerprint2D/JSON +http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON +Trying to get chemical information from RDKit using PubChem_Canonical SMILES +C(C(=O)O)NCP(=O)(O)O ........ -DONE +DONE =========================================================================== -- cgit v1.2.1