diff options
| author | Ranke Johannes <johannes.ranke@agroscope.admin.ch> | 2025-10-22 10:35:37 +0200 |
|---|---|---|
| committer | Ranke Johannes <johannes.ranke@agroscope.admin.ch> | 2025-10-22 11:57:27 +0200 |
| commit | adc69bd4e1543a229fbee543940f8b92fd499682 (patch) | |
| tree | dcf08c3387f8e7d45290a1ce735896944376b8f3 | |
| parent | d6062331f8b3eacab729c6c2bfca0d24363d074c (diff) | |
Adapt to new SMILES names at PubChem
This may break some code using the old smiles names "PubChem_Canonical"
and "PubChem_Isomeric" instead of the new smiles names "PubChem"
(including isotopic and stereochemical information) and
"PubChem_Connectivity" (only containing connectivity, but still
canonical).
Further changes in this commit:
- A fix to the test target in the makefile
- Use markdown syntax for link to objects from other packages in the
documentation
- Adapt the tests
- Increase test coverage a bit
| -rw-r--r-- | DESCRIPTION | 6 | ||||
| -rw-r--r-- | GNUmakefile | 2 | ||||
| -rw-r--r-- | NEWS.md | 4 | ||||
| -rw-r--r-- | R/chent.R | 62 | ||||
| -rw-r--r-- | log/build.log | 3 | ||||
| -rw-r--r-- | log/check.log | 26 | ||||
| -rw-r--r-- | log/test.log | 6 | ||||
| -rw-r--r-- | man/chent.Rd | 4 | ||||
| -rw-r--r-- | man/pai.Rd | 2 | ||||
| -rw-r--r-- | man/plot.chent.Rd | 2 | ||||
| -rw-r--r-- | man/ppp.Rd | 2 | ||||
| -rw-r--r-- | tests/testthat/test_chent.R | 13 | ||||
| -rw-r--r-- | tests/testthat/test_pai.R | 11 |
13 files changed, 77 insertions, 66 deletions
diff --git a/DESCRIPTION b/DESCRIPTION index 8b44bdc..2201b9a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: chents Type: Package Title: Chemical Entities as R Objects -Version: 0.3.7 -Date: 2024-11-28 +Version: 0.4.0 +Date: 2025-10-22 Authors@R: c(person("Johannes", "Ranke", role = c("aut", "cre", "cph"), email = "johannes.ranke@jrwb.de")) Description: Utilities for dealing with chemical entities and associated @@ -28,4 +28,4 @@ LazyData: yes Encoding: UTF-8 URL: https://pkgdown.jrwb.de/chents, https://github.com/jranke/chents Roxygen: list(markdown = TRUE, r6 = TRUE) -RoxygenNote: 7.3.2.9000 +RoxygenNote: 7.3.3 diff --git a/GNUmakefile b/GNUmakefile index 2e1ddf8..6bbb53f 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -32,7 +32,7 @@ build: roxy $(TGZ) test: build "$(RBIN)/Rscript" -e 'library(devtools); devtools::test()' 2>&1 | tee log/test.log - sed -i -e "s/\r.*\r//" test.log + sed -i -e "s/\r.*\r//" log/test.log quickcheck: build _R_CHECK_CRAN_INCOMING_REMOTE_=false "$(RBIN)/R" CMD check $(TGZ) --no-tests @@ -1,3 +1,7 @@ +## version 0.4.0 + +- R/chent.R: PubChem has changed the names of the SMILES codes they provide. The former isomeric smiles that was incorporated in our chent objects as "Pubchem_Isomeric" is now simply calles SMILES, and is incorporated in our objects as "PubChem". The SMILES code formerly given as "canonical" is now termed "connectivity SMILES" because it does not contain isotopic or stereochemical specifications. In the chents object, it is now available under the name "PubChem_Connectivity". This is a breaking change, so objects generated with versions < 0.4.0 may produce errors when used with current versions. + ## version 0.3.7 - R/chent.R: Do not attempt to load a chyaml file per default, as the format of such a file and the resulting chyaml list object is not documented and would need to be inferred from its use in the pfm package. @@ -5,7 +5,7 @@ #' generated using RDKit if RDKit and its python bindings are installed. #' #' @export -#' @format An \code{\link{R6Class}} generator object +#' @format An [R6Class] generator object #' @importFrom R6 R6Class #' @importFrom utils URLencode #' @importFrom webchem get_cid cid_compinfo @@ -79,7 +79,7 @@ chent <- R6Class("chent", #' @field svg SVG code svg = NULL, - #' @field Picture Graph as a \code{\link{picture}} object obtained using grImport + #' @field Picture Graph as a [grImport::Picture-class] object obtained using the grImport package Picture = NULL, #' @field Pict_font_size Font size as extracted from the intermediate PostScript file @@ -123,22 +123,8 @@ chent <- R6Class("chent", } if (rdkit) { - if(rdkit_available) { - if (is.null(self$smiles)) { - message("RDKit would need a SMILES code") - } else { - available_smiles <- names(self$smiles) - smiles_preference <- c("user", "PubChem_Isomeric", "PubChem_Canonical") - smiles_preferred_i <- min(match(available_smiles, smiles_preference)) - smiles_preferred <- smiles_preference[smiles_preferred_i] - - message("Trying to get chemical information from RDKit using ", - smiles_preferred, " SMILES\n", - self$smiles[smiles_preferred]) - self$get_rdkit(template = template) - self$mw <- self$rdkit$mw - attr(self$mw, "source") <- "rdkit" - } + if (rdkit_available) { + self$get_rdkit(template = template) } else { message("RDKit is not available") } @@ -154,11 +140,11 @@ chent <- R6Class("chent", attr(self$smiles, "source") <- "user" } if (is.null(self$inchikey)) { - self$inchikey<- NA + self$inchikey <- NA attr(self$inchikey, "source") <- "user" } if (is.null(self$mw)) { - self$mw<- NA + self$mw <- NA attr(self$mw, "source") <- "user" } @@ -187,16 +173,16 @@ chent <- R6Class("chent", get_pubchem = function(pubchem_cid) { self$pubchem = as.list(webchem::pc_prop(pubchem_cid, from = "cid", properties = c("MolecularFormula", "MolecularWeight", - "CanonicalSMILES", "IsomericSMILES", + "ConnectivitySMILES", "SMILES", "InChI", "InChIKey", "IUPACName", "XLogP", "TPSA", "Complexity", "Charge", "HBondDonorCount", "HBondAcceptorCount"))) - self$pubchem$synonyms = webchem::pc_synonyms(pubchem_cid, from ="cid")[[1]] + self$pubchem$synonyms = webchem::pc_synonyms(pubchem_cid, from = "cid")[[1]] - self$smiles["PubChem_Canonical"] <- self$pubchem$CanonicalSMILES + self$smiles["PubChem"] <- self$pubchem$SMILES - if (self$pubchem$IsomericSMILES != self$pubchem$CanonicalSMILES) { - self$smiles["PubChem_Isomeric"] <- self$pubchem$IsomericSMILES + if (self$pubchem$SMILES != self$pubchem$ConnectivitySMILES) { + self$smiles["PubChem_Connectivity"] <- self$pubchem$ConnectivitySMILES } self$mw = as.numeric(self$pubchem$MolecularWeight) @@ -233,17 +219,29 @@ chent <- R6Class("chent", #' @description #' Get chemical information from RDKit if available get_rdkit = function(template = NULL) { - if (!rdkit_available) { - stop("RDKit is not available") - } + + if (!rdkit_available) stop("RDKit is not available") + if (is.null(self$smiles)) stop("RDKit would need a SMILES code") + + available_smiles <- names(self$smiles) + smiles_preference <- c("user", "PubChem", "PubChem_Connectivity") + smiles_preferred_i <- min(match(available_smiles, smiles_preference)) + smiles_preferred <- smiles_preference[smiles_preferred_i] + + message("Trying to get chemical information from RDKit using ", + smiles_preferred, " SMILES\n", + self$smiles[smiles_preferred]) self$rdkit <- list() self$mol <- rdkit_module$Chem$MolFromSmiles(self$smiles[1]) self$rdkit$mw <- rdkit_module$Chem$Descriptors$MolWt(self$mol) - if (!is.null(self$mw)) { + if (!is.na(self$mw)) { if (round(self$rdkit$mw, 1) != round(self$mw, 1)) { message("RDKit mw is ", self$rdkit$mw) message("mw is ", self$mw) } + } else { + self$mw <- self$rdkit$mw + attr(self$mw, "source") <- "rdkit" } # Create an SVG representation @@ -630,7 +628,7 @@ draw_svg.chent = function(x, width = 300, height = 150, #' #' @importFrom grImport grid.picture #' @param x The chent object to be plotted -#' @param ... Further arguments passed to \code{\link{grid.picture}} +#' @param ... Further arguments passed to [grImport::grid.picture] #' @export #' @examples #' caffeine <- chent$new("caffeine") @@ -652,7 +650,7 @@ plot.chent = function(x, ...) { #' [bcpc_query][webchem::bcpc_query]. #' #' @export -#' @format An \code{\link{R6Class}} generator object +#' @format An [R6::R6Class] generator object #' @examples #' # On Travis, we get a certificate validation error, #' # likely because the system (xenial) is so old, @@ -776,7 +774,7 @@ print.pai = function(x, ...) { #' product #' #' @export -#' @format An \code{\link{R6Class}} generator object. +#' @format An [R6::R6Class] generator object. ppp <- R6Class("ppp", public = list( diff --git a/log/build.log b/log/build.log index b6e3ba5..ff5543b 100644 --- a/log/build.log +++ b/log/build.log @@ -3,6 +3,7 @@ * checking DESCRIPTION meta-information ... OK * checking for LF line-endings in source and make files and shell scripts * checking for empty or unneeded directories +Removed empty directory ‘chents/tests/testthat/_snaps’ Omitted ‘LazyData’ from DESCRIPTION -* building ‘chents_0.3.7.tar.gz’ +* building ‘chents_0.4.0.tar.gz’ diff --git a/log/check.log b/log/check.log index 95a06de..a85d458 100644 --- a/log/check.log +++ b/log/check.log @@ -1,18 +1,17 @@ * using log directory ‘/home/agsad.admin.ch/f80868656/projects/chents/chents.Rcheck’ -* using R version 4.4.1 (2024-06-14) +* using R version 4.5.1 (2025-06-13) * using platform: x86_64-pc-linux-gnu * R was compiled by - gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 - GNU Fortran (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 -* running under: Ubuntu 22.04.5 LTS + gcc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0 + GNU Fortran (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0 +* running under: Ubuntu 24.04.3 LTS * using session charset: UTF-8 * using options ‘--no-tests --as-cran’ * checking for file ‘chents/DESCRIPTION’ ... OK * checking extension type ... Package -* this is package ‘chents’ version ‘0.3.7’ +* this is package ‘chents’ version ‘0.4.0’ * package encoding: UTF-8 -* checking CRAN incoming feasibility ... Note_to_CRAN_maintainers -Maintainer: ‘Johannes Ranke <johannes.ranke@jrwb.de>’ +* checking CRAN incoming feasibility ... OK * checking package namespace information ... OK * checking package dependencies ... OK * checking if this is a source package ... OK @@ -27,8 +26,7 @@ Maintainer: ‘Johannes Ranke <johannes.ranke@jrwb.de>’ * checking package directory ... OK * checking for future file timestamps ... OK * checking DESCRIPTION meta-information ... OK -* checking top-level files ... NOTE -Files ‘README.md’ or ‘NEWS.md’ cannot be checked without ‘pandoc’ being installed. +* checking top-level files ... OK * checking for left-over files ... OK * checking index information ... OK * checking package subdirectories ... OK @@ -46,7 +44,7 @@ Files ‘README.md’ or ‘NEWS.md’ cannot be checked without ‘pandoc’ be * checking S3 generic/method consistency ... OK * checking replacement functions ... OK * checking foreign function calls ... OK -* checking R code for possible problems ... OK +* checking R code for possible problems ... [48s/10s] OK * checking Rd files ... OK * checking Rd metadata ... OK * checking Rd line widths ... OK @@ -56,17 +54,19 @@ Files ‘README.md’ or ‘NEWS.md’ cannot be checked without ‘pandoc’ be * checking Rd \usage sections ... OK * checking Rd contents ... OK * checking for unstated dependencies in examples ... OK -* checking examples ... [8s/14s] OK +* checking examples ... [9s/15s] OK * checking for unstated dependencies in ‘tests’ ... OK * checking tests ... SKIPPED * checking PDF version of manual ... OK * checking HTML version of manual ... NOTE -Skipping checking HTML validation: no command 'tidy' found +Skipping checking HTML validation: no command 'tidy' found. +Please obtain a recent version of HTML Tidy by downloading a binary +release or compiling the source code from <https://www.html-tidy.org/>. * checking for non-standard things in the check directory ... OK * checking for detritus in the temp directory ... OK * DONE -Status: 2 NOTEs +Status: 1 NOTE See ‘/home/agsad.admin.ch/f80868656/projects/chents/chents.Rcheck/00check.log’ for details. diff --git a/log/test.log b/log/test.log index 3c395a1..b0ee64f 100644 --- a/log/test.log +++ b/log/test.log @@ -8,10 +8,10 @@ The following object is masked from ‘package:devtools’: test_file ✔ | F W S OK | Context -✔ | 7 | Generation of chent objects [4.0s] -✔ | 8 | Generation of pai objects [6.6s] +✔ | 8 | Generation of chent objects [4.4s] +✔ | 7 | Generation of pai objects [5.9s] ══ Results ═════════════════════════════════════════════════════════════════════ -Duration: 10.6 s +Duration: 10.2 s [ FAIL 0 | WARN 0 | SKIP 0 | PASS 15 ] diff --git a/man/chent.Rd b/man/chent.Rd index ba47feb..2da0169 100644 --- a/man/chent.Rd +++ b/man/chent.Rd @@ -4,7 +4,7 @@ \alias{chent} \title{An R6 class for chemical entities with associated data} \format{ -An \code{\link{R6Class}} generator object +An \link[R6:R6Class]{R6::R6Class} generator object } \description{ The class is initialised with an identifier. Chemical @@ -44,7 +44,7 @@ List of information retrieved from PubChem} \item{\code{svg}}{SVG code} -\item{\code{Picture}}{Graph as a \code{\link{picture}} object obtained using grImport} +\item{\code{Picture}}{Graph as a \link[grImport:Picture-class]{grImport::Picture} object obtained using the grImport package} \item{\code{Pict_font_size}}{Font size as extracted from the intermediate PostScript file} @@ -4,7 +4,7 @@ \alias{pai} \title{An R6 class for pesticidal active ingredients and associated data} \format{ -An \code{\link{R6Class}} generator object +An \link[R6:R6Class]{R6::R6Class} generator object } \description{ This class is derived from \link{chent}. It makes it easy diff --git a/man/plot.chent.Rd b/man/plot.chent.Rd index f2850fa..ecaa01b 100644 --- a/man/plot.chent.Rd +++ b/man/plot.chent.Rd @@ -9,7 +9,7 @@ \arguments{ \item{x}{The chent object to be plotted} -\item{...}{Further arguments passed to \code{\link{grid.picture}}} +\item{...}{Further arguments passed to \link[grImport:grid.picture]{grImport::grid.picture}} } \description{ Plot method for chent objects @@ -4,7 +4,7 @@ \alias{ppp} \title{R6 class for a plant protection product with at least one active ingredient} \format{ -An \code{\link{R6Class}} generator object. +An \link[R6:R6Class]{R6::R6Class} generator object. } \description{ Contains basic information about the active ingredients in the diff --git a/tests/testthat/test_chent.R b/tests/testthat/test_chent.R index 7594640..4546423 100644 --- a/tests/testthat/test_chent.R +++ b/tests/testthat/test_chent.R @@ -5,13 +5,18 @@ skip_if_no_rdkit <- function() { if (!chents:::rdkit_available) skip("RDKit is not available via reticulate") } -oct <- chent$new("1-octanol", smiles = "CCCCCCCCO", pubchem = FALSE, chyaml = FALSE) +oct <- chent$new("1-octanol", smiles = "CCCCCCCCO", rdkit = FALSE, pubchem = FALSE, chyaml = FALSE) + +test_that("We can initialise an object only with identifier and SMILES code", { + expect_equal(oct$identifier, c(X1.octanol = "1-octanol")) # The name of the identifier is generated using make.names() + expect_equal(oct$inchikey, structure(NA, source = "user")) + expect_equal(oct$smiles, c(user = "CCCCCCCCO")) +}) test_that("We can generate a chent object from SMILES using RDKit", { skip_if_no_rdkit() + oct$get_rdkit() expect_equivalent(round(oct$mw, 2), 130.23) - expect_equal(names(oct$identifier), "X1.octanol") - expect_equal(oct$smiles[["user"]], "CCCCCCCCO") }) test_that("We can add information retrieved from PubChem via webchem", { @@ -20,5 +25,5 @@ test_that("We can add information retrieved from PubChem via webchem", { ik = "KBPLFHHGFOOTCA-UHFFFAOYSA-N" attr(ik, "source") <- "pubchem" expect_equal(oct$inchikey, ik) - expect_equal(oct$smiles[["PubChem_Canonical"]], "CCCCCCCCO") + expect_equal(oct$smiles[["PubChem"]], "CCCCCCCCO") }) diff --git a/tests/testthat/test_pai.R b/tests/testthat/test_pai.R index 518fc56..f38672d 100644 --- a/tests/testthat/test_pai.R +++ b/tests/testthat/test_pai.R @@ -4,10 +4,13 @@ require(testthat) context("Generation of pai objects") -test_that("a pai object is correctly generated from an ambiguous name", { +test_that("a pai object is correctly generated", { skip_on_travis() # server certificate verification failed in curl_fetch_memory() - glyphosate <- pai$new("glyphosate", chyaml = FALSE) - expect_message(pai$new("benzalkonium chloride", chyaml = FALSE), "did not give results") + glyphosate <- pai$new("glyphosate") + + # This did not give results at BCPC in previous times, so it was used to test + # the corresponding warning. + #bc <- pai$new("benzalkonium chloride") expect_equivalent(glyphosate$bcpc$cas, "1071-83-6") expect_equivalent(glyphosate$bcpc$formula, "C3H8NO5P") @@ -18,5 +21,5 @@ test_that("a pai object is correctly generated from an ambiguous name", { expect_equal(glyphosate$inchikey, ik) expect_equivalent(round(glyphosate$mw, 2), 169.07) smiles <- "C(C(=O)O)NCP(=O)(O)O" - expect_equal(glyphosate$smiles[["PubChem_Canonical"]], smiles) + expect_equal(glyphosate$smiles[["PubChem"]], smiles) }) |
