From 2fa9b08f8c77aa1bdc982be7110d3dad1ef24da0 Mon Sep 17 00:00:00 2001 From: Tamas Stirling Date: Sat, 21 Dec 2024 22:20:32 +0100 Subject: [PATCH 1/8] Update US EPA SRS URL --- R/srs.R | 6 +++--- tests/testthat/test-srs.R | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/srs.R b/R/srs.R index 49d20aaa..71f0395b 100644 --- a/R/srs.R +++ b/R/srs.R @@ -32,7 +32,7 @@ srs_query <- if (!ping_service("srs")) stop(webchem_message("service_down")) names(query) <- query from <- match.arg(from) - entity_url <- "https://cdxnodengn.epa.gov/cdx-srs-rest/" + entity_url <- "https://cdxapps.epa.gov/oms-substance-registry-services/rest-api" if (from == "cas"){ query <- as.cas(query, verbose = verbose) } @@ -55,12 +55,12 @@ srs_query <- } if (verbose) message(httr::message_for_status(response)) if (response$status_code == 200) { - text_content <- httr::content(response, "text") + text_content <- httr::content(response, "text", encoding = "utf-8") if (text_content == "[]") { if (verbose) webchem_message("not_available") return(NA) } else { - jsonlite::fromJSON(text_content) + tibble::as_tibble(jsonlite::fromJSON(text_content)) } } else { return(NA) diff --git a/tests/testthat/test-srs.R b/tests/testthat/test-srs.R index 4491d008..dd204013 100644 --- a/tests/testthat/test-srs.R +++ b/tests/testthat/test-srs.R @@ -13,7 +13,7 @@ test_that("SRS returns correct results", { expect_true(is.na(a)) expect_true(is.na(b)) expect_type(c, "list") - expect_s3_class(c$`50-00-0`, "data.frame") + expect_true(inherits(c$`50-00-0`, "data.frame")) expect_equal(c$`50-00-0`$systematicName, "Formaldehyde") expect_equal(d$aniline$systematicName, "Benzenamine") From 351282cc7f8cc50f4ba0847e98a6f1617c09139f Mon Sep 17 00:00:00 2001 From: Tamas Stirling Date: Sat, 21 Dec 2024 22:21:40 +0100 Subject: [PATCH 2/8] Fix a ChEMBL test expect more list elements --- tests/testthat/test-chembl.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-chembl.R b/tests/testthat/test-chembl.R index 66d2a214..128372d2 100644 --- a/tests/testthat/test-chembl.R +++ b/tests/testthat/test-chembl.R @@ -14,7 +14,7 @@ test_that("chembl_query()", { o4 <- chembl_query("CHEMBL771355", resource = "assay") expect_type(o1, "list") - expect_equal(length(o1[[1]]), 35) + expect_equal(length(o1[[1]]), 36) expect_equal(o1m[2], "OK (HTTP 200).") expect_equal(length(o2), 2) expect_equal(o3[[1]]$entity_type, "ASSAY") From a8f95cfb2172eaf3f109740f0f9e5a21bec45f44 Mon Sep 17 00:00:00 2001 From: Tamas Stirling Date: Sat, 21 Dec 2024 22:22:02 +0100 Subject: [PATCH 3/8] Tidy get_cid() documentation --- R/pubchem.R | 20 ++++++++++---------- man/get_cid.Rd | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/R/pubchem.R b/R/pubchem.R index 79275cef..b6a75d37 100644 --- a/R/pubchem.R +++ b/R/pubchem.R @@ -25,19 +25,19 @@ #' \code{}, \code{"sourceid/"} or \code{"sourceall"}.} #' \item{\code{assay}: \code{"aid"}, \code{}.} #' } -#' @details is assembled as "{\code{substructure} | -#' \code{superstructure} | \code{similarity} | \code{identity}} / {\code{smiles} -#' | \code{inchi} | \code{sdf} | \code{cid}}", e.g. +#' @details is assembled as "(\code{substructure} | +#' \code{superstructure} | \code{similarity} | \code{identity}) / (\code{smiles} +#' | \code{inchi} | \code{sdf} | \code{cid})", e.g. #' \code{from = "substructure/smiles"}. -#' @details \code{} is assembled as "\code{xref}/\{\code{RegistryID} | +#' @details \code{} is assembled as "\code{xref}/(\code{RegistryID} | #' \code{RN} | \code{PubMedID} | \code{MMDBID} | \code{ProteinGI}, #' \code{NucleotideGI} | \code{TaxonomyID} | \code{MIMID} | \code{GeneID} | -#' \code{ProbeID} | \code{PatentID}\}", e.g. \code{from = "xref/RN"} will query +#' \code{ProbeID} | \code{PatentID})", e.g. \code{from = "xref/RN"} will query #' by CAS RN. #' @details is either \code{fastformula} or it is assembled as -#' "{\code{fastidentity} | \code{fastsimilarity_2d} | \code{fastsimilarity_3d} | -#' \code{fastsubstructure} | \code{fastsuperstructure}}/{\code{smiles} | -#' \code{smarts} | \code{inchi} | \code{sdf} | \code{cid}}", e.g. +#' "(\code{fastidentity} | \code{fastsimilarity_2d} | \code{fastsimilarity_3d} | +#' \code{fastsubstructure} | \code{fastsuperstructure})/(\code{smiles} | +#' \code{smarts} | \code{inchi} | \code{sdf} | \code{cid})", e.g. #' \code{from = "fastidentity/smiles"}. #' @details \code{} is any valid PubChem Data Source ID. When #' \code{from = "sourceid/"}, the query is the ID of the substance in @@ -46,8 +46,8 @@ #' depositor names. Depositor names are not case sensitive. #' @details Depositor names and Data Source IDs can be found at #' \url{https://pubchem.ncbi.nlm.nih.gov/sources/}. -#' @details \code{} is assembled as "\code{target}/\{\code{gi} | -#' \code{proteinname} | \code{geneid} | \code{genesymbol} | \code{accession}\}", +#' @details \code{} is assembled as "\code{target}/(\code{gi} | +#' \code{proteinname} | \code{geneid} | \code{genesymbol} | \code{accession})", #' e.g. \code{from = "target/geneid"} will query by GeneID. #' @references Wang, Y., J. Xiao, T. O. Suzek, et al. 2009 PubChem: A Public #' Information System for diff --git a/man/get_cid.Rd b/man/get_cid.Rd index ff38e218..574b0a79 100644 --- a/man/get_cid.Rd +++ b/man/get_cid.Rd @@ -55,21 +55,21 @@ Valid values for the \code{from} argument depend on the \item{\code{assay}: \code{"aid"}, \code{}.} } - is assembled as "{\code{substructure} | -\code{superstructure} | \code{similarity} | \code{identity}} / {\code{smiles} - | \code{inchi} | \code{sdf} | \code{cid}}", e.g. + is assembled as "(\code{substructure} | +\code{superstructure} | \code{similarity} | \code{identity}) / (\code{smiles} + | \code{inchi} | \code{sdf} | \code{cid})", e.g. \code{from = "substructure/smiles"}. -\code{} is assembled as "\code{xref}/\{\code{RegistryID} | +\code{} is assembled as "\code{xref}/(\code{RegistryID} | \code{RN} | \code{PubMedID} | \code{MMDBID} | \code{ProteinGI}, \code{NucleotideGI} | \code{TaxonomyID} | \code{MIMID} | \code{GeneID} | -\code{ProbeID} | \code{PatentID}\}", e.g. \code{from = "xref/RN"} will query +\code{ProbeID} | \code{PatentID})", e.g. \code{from = "xref/RN"} will query by CAS RN. is either \code{fastformula} or it is assembled as -"{\code{fastidentity} | \code{fastsimilarity_2d} | \code{fastsimilarity_3d} | -\code{fastsubstructure} | \code{fastsuperstructure}}/{\code{smiles} | -\code{smarts} | \code{inchi} | \code{sdf} | \code{cid}}", e.g. +"(\code{fastidentity} | \code{fastsimilarity_2d} | \code{fastsimilarity_3d} | +\code{fastsubstructure} | \code{fastsuperstructure})/(\code{smiles} | +\code{smarts} | \code{inchi} | \code{sdf} | \code{cid})", e.g. \code{from = "fastidentity/smiles"}. \code{} is any valid PubChem Data Source ID. When @@ -82,8 +82,8 @@ depositor names. Depositor names are not case sensitive. Depositor names and Data Source IDs can be found at \url{https://pubchem.ncbi.nlm.nih.gov/sources/}. -\code{} is assembled as "\code{target}/\{\code{gi} | -\code{proteinname} | \code{geneid} | \code{genesymbol} | \code{accession}\}", +\code{} is assembled as "\code{target}/(\code{gi} | +\code{proteinname} | \code{geneid} | \code{genesymbol} | \code{accession})", e.g. \code{from = "target/geneid"} will query by GeneID. } \note{ From 75aaf12d7860427dc1f4f0ed1a1f32102e654efd Mon Sep 17 00:00:00 2001 From: Tamas Stirling Date: Sat, 21 Dec 2024 22:25:31 +0100 Subject: [PATCH 4/8] Update ChemSpider URL for validating inchikeys --- R/utils.R | 64 +++++++++++++++++++++++++++++-------------- man/is.inchikey.Rd | 5 ++++ man/is.inchikey_cs.Rd | 5 +++- 3 files changed, 53 insertions(+), 21 deletions(-) diff --git a/R/utils.R b/R/utils.R index e02b7872..97376e82 100644 --- a/R/utils.R +++ b/R/utils.R @@ -11,6 +11,9 @@ #' @param x character; input InChIKey #' @param type character; How should be checked? Either, by format (see above) #' ('format') or by ChemSpider ('chemspider'). +#' @param apikey character; your API key. If NULL (default), +#' \code{cs_check_key()} will look for it in .Renviron or .Rprofile. Only +#' used when `type = "chemspider"`. #' @param verbose logical; print messages during processing to console? #' @return a logical #' @@ -31,17 +34,23 @@ #' is.inchikey('BQJCRHHNABKAKU/KBQPJGBKSA/N') #' is.inchikey('BQJCRHHNABKAKU-KBQPJGBKXA-N') #' is.inchikey('BQJCRHHNABKAKU-KBQPJGBKSB-N') -is.inchikey = function(x, type = c('format', 'chemspider'), - verbose = getOption("verbose")) { +is.inchikey = function( + x, + type = c('format', 'chemspider'), + apikey = NULL, + verbose = getOption("verbose") + ) { # x <- 'BQJCRHHNABKAKU-KBQPJGBKSA-N' if (length(x) > 1) { stop('Cannot handle multiple input strings.') } type <- match.arg(type) - out <- switch(type, - format = is.inchikey_format(x, verbose = verbose), - chemspider = is.inchikey_cs(x, verbose = verbose)) + out <- switch( + type, + format = is.inchikey_format(x, verbose = verbose), + chemspider = is.inchikey_cs(x, apikey = apikey, verbose = verbose) + ) return(out) } @@ -49,6 +58,8 @@ is.inchikey = function(x, type = c('format', 'chemspider'), #' Check if input is a valid inchikey using ChemSpider API #' #' @param x character; input string +#' @param apikey character; your API key. If NULL (default), +#' \code{cs_check_key()} will look for it in .Renviron or .Rprofile. #' @param verbose logical; print messages during processing to console? #' @return a logical #' @@ -65,9 +76,15 @@ is.inchikey = function(x, type = c('format', 'chemspider'), #' is.inchikey_cs('BQJCRHHNABKAKU-KBQPJGBKXA-N') #' is.inchikey_cs('BQJCRHHNABKAKU-KBQPJGBKSB-N') #' } -is.inchikey_cs <- function(x, verbose = getOption("verbose")){ - - if (!ping_service("cs_web")) stop(webchem_message("service_down")) +is.inchikey_cs <- function( + x, + apikey = NULL, + verbose = getOption("verbose") + ){ + if (is.null(apikey)) { + apikey <- cs_check_key() + } + if (!ping_service("cs")) stop(webchem_message("service_down")) if (length(x) > 1) { stop('Cannot handle multiple input strings.') @@ -76,13 +93,20 @@ is.inchikey_cs <- function(x, verbose = getOption("verbose")){ if (verbose) webchem_message("na") return(NA) } - baseurl <- 'http://www.chemspider.com/InChI.asmx/IsValidInChIKey?' - qurl <- paste0(baseurl, 'inchi_key=', x) - webchem_sleep(type = 'scrape') + qurl <- 'https://api.rsc.org/compounds/v1/tools/validate/inchikey' + headers <- c( + "Accept" = "application/json", + "Content-Type" = "application/json", + "apikey" = apikey + ) + body <- list("inchikey" = x) |> jsonlite::toJSON(auto_unbox = TRUE) + webchem_sleep(type = 'API') if (verbose) webchem_message("query", x, appendLF = FALSE) - res <- try(httr::RETRY("GET", - qurl, - httr::user_agent(webchem_url()), + res <- try(httr::RETRY("POST", + url = qurl, + httr::add_headers(.headers = headers), + body = body, + encode = "json", terminate_on = 404, quiet = TRUE), silent = TRUE) if (inherits(res, "try-error")) { @@ -91,13 +115,13 @@ is.inchikey_cs <- function(x, verbose = getOption("verbose")){ } if (verbose) message(httr::message_for_status(res)) if (res$status_code == 200){ - h <- xml2::read_xml(res) - out <- as.logical(xml_text(h)) - return(out) - } - else { - return(NA) + out <- as.logical(httr::content(res)) + } else if (res$status_code == 400) { + out <- FALSE + } else { + out <- NA } + return(out) } diff --git a/man/is.inchikey.Rd b/man/is.inchikey.Rd index e51f6c1e..eac9533d 100644 --- a/man/is.inchikey.Rd +++ b/man/is.inchikey.Rd @@ -7,6 +7,7 @@ is.inchikey( x, type = c("format", "chemspider"), + apikey = NULL, verbose = getOption("verbose") ) } @@ -16,6 +17,10 @@ is.inchikey( \item{type}{character; How should be checked? Either, by format (see above) ('format') or by ChemSpider ('chemspider').} +\item{apikey}{character; your API key. If NULL (default), + \code{cs_check_key()} will look for it in .Renviron or .Rprofile. Only +used when `type = "chemspider"`.} + \item{verbose}{logical; print messages during processing to console?} } \value{ diff --git a/man/is.inchikey_cs.Rd b/man/is.inchikey_cs.Rd index 6ead4680..93b588cb 100644 --- a/man/is.inchikey_cs.Rd +++ b/man/is.inchikey_cs.Rd @@ -4,11 +4,14 @@ \alias{is.inchikey_cs} \title{Check if input is a valid inchikey using ChemSpider API} \usage{ -is.inchikey_cs(x, verbose = getOption("verbose")) +is.inchikey_cs(x, apikey = NULL, verbose = getOption("verbose")) } \arguments{ \item{x}{character; input string} +\item{apikey}{character; your API key. If NULL (default), +\code{cs_check_key()} will look for it in .Renviron or .Rprofile.} + \item{verbose}{logical; print messages during processing to console?} } \value{ From def99afa004a51304e8c9b714ec7ca18821b9cea Mon Sep 17 00:00:00 2001 From: Tamas Stirling Date: Sat, 21 Dec 2024 22:44:10 +0100 Subject: [PATCH 5/8] Use vcr when testing is.inchikey() with the new ChemSpider URL --- DESCRIPTION | 4 +- tests/fixtures/is_inchikey_cs_1.yml | 139 ++++++++++++++++++++++++++++ tests/fixtures/is_inchikey_cs_2.yml | 133 ++++++++++++++++++++++++++ tests/testthat/test-utils.R | 12 ++- 4 files changed, 283 insertions(+), 5 deletions(-) create mode 100644 tests/fixtures/is_inchikey_cs_1.yml create mode 100644 tests/fixtures/is_inchikey_cs_2.yml diff --git a/DESCRIPTION b/DESCRIPTION index 1eda32f1..aef2bd33 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -50,8 +50,8 @@ Suggests: rmarkdown, plot.matrix, usethis, - vcr -RoxygenNote: 7.2.3 + vcr (>= 0.6.0) +RoxygenNote: 7.3.2 VignetteBuilder: knitr Config/testthat/edition: 3 Config/testthat/parallel: true diff --git a/tests/fixtures/is_inchikey_cs_1.yml b/tests/fixtures/is_inchikey_cs_1.yml new file mode 100644 index 00000000..26d94e37 --- /dev/null +++ b/tests/fixtures/is_inchikey_cs_1.yml @@ -0,0 +1,139 @@ +http_interactions: +- request: + method: post + uri: https://api.rsc.org/compounds/v1/filter/name + body: + encoding: '' + string: '{"name":"triclosan","orderBy":"recordId","orderDirection":"ascending"}' + headers: + Accept: application/json, text/xml, application/xml, */* + Content-Type: '' + apikey: <> + response: + status: + status_code: 200 + category: Success + reason: OK + message: 'Success: (200) OK' + headers: + content-type: application/json; charset=utf-8 + content-length: '50' + date: Sat, 21 Dec 2024 21:31:09 GMT + x-amzn-requestid: ecdd06c9-3b75-415d-b634-a5886580e2df + x-amz-apigw-id: DKUAuGcNLPEEa9A= + x-amzn-trace-id: Root=1-6767339d-3990071163dea691554eec92;Parent=5f382715e87ab0be;Sampled=0;Lineage=1:29310d16:0 + x-cache: Miss from cloudfront + via: 1.1 6164863a8032b4b19e42aff8915017e2.cloudfront.net (CloudFront) + x-amz-cf-pop: BUD50-P1 + x-amz-cf-id: u8vA6ESwF_LdnEeI7Ffotn6LXdU7DgmmKdNieQ0zx2k3mMxG9bUOpg== + body: + encoding: '' + file: no + string: '{"queryId":"7ea79f76-b7c0-43af-bf6f-8e0f5a003bef"}' + recorded_at: 2024-12-21 21:31:14 GMT + recorded_with: vcr/1.6.0.91, webmockr/2.0.0 +- request: + method: post + uri: https://api.rsc.org/compounds/v1/tools/validate/inchikey + body: + encoding: '' + string: '{"inchikey":"BQJCRHHNABKAKU-KBQPJGBKSA-5"}' + headers: + Accept: application/json + Content-Type: application/json + apikey: <> + response: + status: + status_code: 400 + category: Client error + reason: Bad Request + message: 'Client error: (400) Bad Request' + headers: + content-type: application/problem+json; charset=utf-8 + content-length: '242' + date: Sat, 21 Dec 2024 21:31:10 GMT + x-amzn-requestid: fac3519a-b35f-4cc7-93ec-91c96a7bf535 + x-amz-apigw-id: DKUAyGVerPEEfDA= + x-amzn-trace-id: Root=1-6767339e-68245b8e72cbadd249058c1a;Parent=0e96806a9b8cc637;Sampled=0;Lineage=1:29310d16:0 + x-cache: Error from cloudfront + via: 1.1 6164863a8032b4b19e42aff8915017e2.cloudfront.net (CloudFront) + x-amz-cf-pop: BUD50-P1 + x-amz-cf-id: HCjrhtY_9PnEI1gz05vEDiGxCSs9C3l1q1-a7PbrsC4sDmLsLBXpsA== + body: + encoding: '' + file: no + string: '{"type":"https://tools.ietf.org/html/rfc9110#section-15.5.1","title":"One + or more validation errors occurred.","status":400,"errors":{"InchiKey":["Invalid + InChIKey format"]},"traceId":"00-87ee51520649bbb0a242091d6fd8f000-0f4830f37364a214-00"}' + recorded_at: 2024-12-21 21:31:14 GMT + recorded_with: vcr/1.6.0.91, webmockr/2.0.0 +- request: + method: post + uri: https://api.rsc.org/compounds/v1/tools/validate/inchikey + body: + encoding: '' + string: '{"inchikey":"BQJCRHHNABKAKU-KBQPJGBKSA-5"}' + headers: + Accept: application/json + Content-Type: application/json + apikey: <> + response: + status: + status_code: 400 + category: Client error + reason: Bad Request + message: 'Client error: (400) Bad Request' + headers: + content-type: application/problem+json; charset=utf-8 + content-length: '242' + date: Sat, 21 Dec 2024 21:31:11 GMT + x-amzn-requestid: 2a916bf0-4091-4d27-9a93-74782a4943e0 + x-amz-apigw-id: DKUBCGmCrPEEWxA= + x-amzn-trace-id: Root=1-6767339f-04595f9360d8142f0a3ccc9b;Parent=28114559baa9208f;Sampled=0;Lineage=1:29310d16:0 + x-cache: Error from cloudfront + via: 1.1 6164863a8032b4b19e42aff8915017e2.cloudfront.net (CloudFront) + x-amz-cf-pop: BUD50-P1 + x-amz-cf-id: ImZuFEWRPwlUvRvyMySXOZHurDa5lAe0qb4diEbv3glO60YDJEaiAw== + body: + encoding: '' + file: no + string: '{"type":"https://tools.ietf.org/html/rfc9110#section-15.5.1","title":"One + or more validation errors occurred.","status":400,"errors":{"InchiKey":["Invalid + InChIKey format"]},"traceId":"00-d7514dc309643de0c5595201e8e32c5e-c4608b6c90ff786b-00"}' + recorded_at: 2024-12-21 21:31:14 GMT + recorded_with: vcr/1.6.0.91, webmockr/2.0.0 +- request: + method: post + uri: https://api.rsc.org/compounds/v1/tools/validate/inchikey + body: + encoding: '' + string: '{"inchikey":"BQJCRHHNABKAKU-KBQPJGBKSA-5"}' + headers: + Accept: application/json + Content-Type: application/json + apikey: <> + response: + status: + status_code: 400 + category: Client error + reason: Bad Request + message: 'Client error: (400) Bad Request' + headers: + content-type: application/problem+json; charset=utf-8 + content-length: '242' + date: Sat, 21 Dec 2024 21:31:14 GMT + x-amzn-requestid: 9c7e8f29-31b3-4d1b-8c0c-1aa699ab4135 + x-amz-apigw-id: DKUBbGmRrPEEa9A= + x-amzn-trace-id: Root=1-676733a2-7cc1fbe16a4f1f3a3822fe84;Parent=05627547beb3e142;Sampled=0;Lineage=1:29310d16:0 + x-cache: Error from cloudfront + via: 1.1 6164863a8032b4b19e42aff8915017e2.cloudfront.net (CloudFront) + x-amz-cf-pop: BUD50-P1 + x-amz-cf-id: brMvkXtU1YwLibmvTlNGIFYY_HeCiYFnsK408AIPAr5qAkd_xEw5zA== + body: + encoding: '' + file: no + string: '{"type":"https://tools.ietf.org/html/rfc9110#section-15.5.1","title":"One + or more validation errors occurred.","status":400,"errors":{"InchiKey":["Invalid + InChIKey format"]},"traceId":"00-ebfc6672e561a103f3a78748287168a3-d64320f2c929e4e2-00"}' + recorded_at: 2024-12-21 21:31:14 GMT + recorded_with: vcr/1.6.0.91, webmockr/2.0.0 diff --git a/tests/fixtures/is_inchikey_cs_2.yml b/tests/fixtures/is_inchikey_cs_2.yml new file mode 100644 index 00000000..b4e88c3f --- /dev/null +++ b/tests/fixtures/is_inchikey_cs_2.yml @@ -0,0 +1,133 @@ +http_interactions: +- request: + method: post + uri: https://api.rsc.org/compounds/v1/filter/name + body: + encoding: '' + string: '{"name":"triclosan","orderBy":"recordId","orderDirection":"ascending"}' + headers: + Accept: application/json, text/xml, application/xml, */* + Content-Type: '' + apikey: <> + response: + status: + status_code: 200 + category: Success + reason: OK + message: 'Success: (200) OK' + headers: + content-type: application/json; charset=utf-8 + content-length: '50' + date: Sat, 21 Dec 2024 21:31:20 GMT + x-amzn-requestid: 4171dd05-f6d3-47cf-9844-3adde68028a7 + x-amz-apigw-id: DKUCYGV4LPEEfVQ= + x-amzn-trace-id: Root=1-676733a8-13c653d72b7a5f043a548451;Parent=041d6de2fef576a8;Sampled=0;Lineage=1:29310d16:0 + x-cache: Miss from cloudfront + via: 1.1 6164863a8032b4b19e42aff8915017e2.cloudfront.net (CloudFront) + x-amz-cf-pop: BUD50-P1 + x-amz-cf-id: 8sLJT26clMXUkzElTOnuLCqXMRvVP4pqGA_Jg8oG4A7kzsRD3yCjNA== + body: + encoding: '' + file: no + string: '{"queryId":"6273e02a-bc45-432d-b803-59b103c7a738"}' + recorded_at: 2024-12-21 21:31:21 GMT + recorded_with: vcr/1.6.0.91, webmockr/2.0.0 +- request: + method: post + uri: https://api.rsc.org/compounds/v1/tools/validate/inchikey + body: + encoding: '' + string: '{"inchikey":"BQJCRHHNABKAKU-KBQPJGBKSA-N"}' + headers: + Accept: application/json + Content-Type: application/json + apikey: <> + response: + status: + status_code: 200 + category: Success + reason: OK + message: 'Success: (200) OK' + headers: + content-type: application/json; charset=utf-8 + content-length: '14' + date: Sat, 21 Dec 2024 21:31:21 GMT + x-amzn-requestid: 4fd54ac0-6be5-426f-b956-0500535f73b7 + x-amz-apigw-id: DKUCeGc5LPEEfSA= + x-amzn-trace-id: Root=1-676733a9-53744b1f135212fc340ea85f;Parent=2191e1ac1081e71f;Sampled=0;Lineage=1:29310d16:0 + x-cache: Miss from cloudfront + via: 1.1 6164863a8032b4b19e42aff8915017e2.cloudfront.net (CloudFront) + x-amz-cf-pop: BUD50-P1 + x-amz-cf-id: 4gkNPhVjZwluvW4SB0m9wteE0_yw7WQZs5ZVqU4XKkzYtjTaHHDYew== + body: + encoding: '' + file: no + string: '{"valid":true}' + recorded_at: 2024-12-21 21:31:21 GMT + recorded_with: vcr/1.6.0.91, webmockr/2.0.0 +- request: + method: post + uri: https://api.rsc.org/compounds/v1/filter/name + body: + encoding: '' + string: '{"name":"triclosan","orderBy":"recordId","orderDirection":"ascending"}' + headers: + Accept: application/json, text/xml, application/xml, */* + Content-Type: '' + apikey: <> + response: + status: + status_code: 200 + category: Success + reason: OK + message: 'Success: (200) OK' + headers: + content-type: application/json; charset=utf-8 + content-length: '50' + date: Sat, 21 Dec 2024 21:31:21 GMT + x-amzn-requestid: d36146ac-a1b6-4aa2-823d-7e5831faf2be + x-amz-apigw-id: DKUCgFczLPEEkSQ= + x-amzn-trace-id: Root=1-676733a9-3861a5cd0974962033127ee4;Parent=30ac0a94a7bf97ca;Sampled=0;Lineage=1:29310d16:0 + x-cache: Miss from cloudfront + via: 1.1 6164863a8032b4b19e42aff8915017e2.cloudfront.net (CloudFront) + x-amz-cf-pop: BUD50-P1 + x-amz-cf-id: L_hGgmxFVqoOWCVG9Fj7vBQETt2t8-ZLYQNl4EAZQvrlFUBRImmIEQ== + body: + encoding: '' + file: no + string: '{"queryId":"52dd367e-00f5-49b5-947e-380acb1043ea"}' + recorded_at: 2024-12-21 21:31:21 GMT + recorded_with: vcr/1.6.0.91, webmockr/2.0.0 +- request: + method: post + uri: https://api.rsc.org/compounds/v1/tools/validate/inchikey + body: + encoding: '' + string: '{"inchikey":"BQJCRHHNABKAKU-KBQPJGBKSA"}' + headers: + Accept: application/json + Content-Type: application/json + apikey: <> + response: + status: + status_code: 200 + category: Success + reason: OK + message: 'Success: (200) OK' + headers: + content-type: application/json; charset=utf-8 + content-length: '15' + date: Sat, 21 Dec 2024 21:31:21 GMT + x-amzn-requestid: eb7e0e4d-30fa-4ed9-9517-c2622fa2e259 + x-amz-apigw-id: DKUCjHG0LPEEZyQ= + x-amzn-trace-id: Root=1-676733a9-50266038062bbc1d3a775139;Parent=5d8a8bd923de62ba;Sampled=0;Lineage=1:29310d16:0 + x-cache: Miss from cloudfront + via: 1.1 6164863a8032b4b19e42aff8915017e2.cloudfront.net (CloudFront) + x-amz-cf-pop: BUD50-P1 + x-amz-cf-id: dhqj6-WSQgUY4Hmsbcdh6Oozx3PkvZ0wsX2Vk8h0yXVq8lF3bGC8Yg== + body: + encoding: '' + file: no + string: '{"valid":false}' + recorded_at: 2024-12-21 21:31:21 GMT + recorded_with: vcr/1.6.0.91, webmockr/2.0.0 diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 2a486d82..c59dbbeb 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -13,7 +13,11 @@ test_that("examples in the article are unchanged", { capture_messages(is.cas("64-17-6", verbose = TRUE)), "64-17-6: Checksum is not correct! 5 vs. 6\n") skip_if_not(up, "ChemSpider service is down, skipping tests") - expect_false(is.inchikey("BQJCRHHNABKAKU-KBQPJGBKSA-5", type = "chemspider")) + + vcr::use_cassette("is_inchikey_cs_1", { + A <- is.inchikey("BQJCRHHNABKAKU-KBQPJGBKSA-5", type = "chemspider") + }) + expect_false(A) }) test_that("is.cas() returns correct results", { @@ -48,8 +52,10 @@ test_that("is.inchikey() returns correct results", { skip_on_ci() skip_if_not(up, "ChemSpider service is down, skipping tests") - g <- is.inchikey('BQJCRHHNABKAKU-KBQPJGBKSA-N', type = 'chemspider') - b <- is.inchikey('BQJCRHHNABKAKU-KBQPJGBKSA', type = 'chemspider') + vcr::use_cassette("is_inchikey_cs_2", { + g <- is.inchikey('BQJCRHHNABKAKU-KBQPJGBKSA-N', type = 'chemspider') + b <- is.inchikey('BQJCRHHNABKAKU-KBQPJGBKSA', type = 'chemspider') + }) expect_true(g) expect_false(b) From 422082c3e10260a74a2aeee7f8ad1800ab61c407 Mon Sep 17 00:00:00 2001 From: Tamas Stirling Date: Sun, 22 Dec 2024 11:53:49 +0100 Subject: [PATCH 6/8] Document data sets in separate R scripts --- R/jagst.R | 20 ++++++++++++++++++++ R/lc50.R | 15 +++++++++++++++ R/webchem-package.R | 40 ---------------------------------------- man/jagst.Rd | 2 +- man/lc50.Rd | 2 +- 5 files changed, 37 insertions(+), 42 deletions(-) create mode 100644 R/jagst.R create mode 100644 R/lc50.R diff --git a/R/jagst.R b/R/jagst.R new file mode 100644 index 00000000..98fee0ed --- /dev/null +++ b/R/jagst.R @@ -0,0 +1,20 @@ +#' Organic plant protection products in the river Jagst / Germany in 2013 +#' +#' This dataset comprises environmental monitoring data of organic plant protection products +#' in the year 2013 in the river Jagst, Germany. +#' The data is publicly available and can be retrieved from the +#' LUBW Landesanstalt für Umwelt, Messungen und Naturschutz Baden-Württemberg. +#' It has been preprocessed and comprises measurements of 34 substances. +#' Substances without detects have been removed. +#' on 13 sampling occasions. +#' Values are given in ug/L. +#' +#' @format A data frame with 442 rows and 4 variables: +#' \describe{ +#' \item{date}{sampling data} +#' \item{substance}{substance names} +#' \item{value}{concentration in ug/L} +#' \item{qual}{qualifier, indicating values < LOQ} +#' } +#' @source \url{https://udo.lubw.baden-wuerttemberg.de/public/pages/home/index.xhtml} +"jagst" diff --git a/R/lc50.R b/R/lc50.R new file mode 100644 index 00000000..8489886f --- /dev/null +++ b/R/lc50.R @@ -0,0 +1,15 @@ +#' Acute toxicity data from U.S. EPA ECOTOX +#' +#' This dataset comprises acute ecotoxicity data of 124 insecticides. +#' The data is publicly available and can be retrieved from the EPA ECOTOX database +#' (\url{https://cfpub.epa.gov/ecotox/}) +#' It comprises acute toxicity data (D. magna, 48h, Laboratory, 48h) and has been +#' preprocessed (remove non-insecticides, aggregate multiple value, keep only numeric data etc). +#' +#' @format A data frame with 124 rows and 2 variables: +#' \describe{ +#' \item{cas}{CAS registry number} +#' \item{value}{LC50value} +#' } +#' @source \url{https://cfpub.epa.gov/ecotox/} +"lc50" diff --git a/R/webchem-package.R b/R/webchem-package.R index d55cbb99..cfc8b806 100644 --- a/R/webchem-package.R +++ b/R/webchem-package.R @@ -9,43 +9,3 @@ #' @importFrom utils globalVariables if (getRversion() >= "2.15.1") globalVariables(c(".")) - - - -#' Organic plant protection products in the river Jagst / Germany in 2013 -#' -#' This dataset comprises environmental monitoring data of organic plant protection products -#' in the year 2013 in the river Jagst, Germany. -#' The data is publicly available and can be retrieved from the -#' LUBW Landesanstalt für Umwelt, Messungen und Naturschutz Baden-Württemberg. -#' It has been preprocessed and comprises measurements of 34 substances. -#' Substances without detects have been removed. -#' on 13 sampling occasions. -#' Values are given in ug/L. -#' -#' @format A data frame with 442 rows and 4 variables: -#' \describe{ -#' \item{date}{sampling data} -#' \item{substance}{substance names} -#' \item{value}{concentration in ug/L} -#' \item{qual}{qualifier, indicating values < LOQ} -#' } -#' @source \url{https://udo.lubw.baden-wuerttemberg.de/public/pages/home/index.xhtml} -"jagst" - - -#' Acute toxicity data from U.S. EPA ECOTOX -#' -#' This dataset comprises acute ecotoxicity data of 124 insecticides. -#' The data is publicly available and can be retrieved from the EPA ECOTOX database -#' (\url{https://cfpub.epa.gov/ecotox/}) -#' It comprises acute toxicity data (D. magna, 48h, Laboratory, 48h) and has been -#' preprocessed (remove non-insecticides, aggregate multiple value, keep only numeric data etc). -#' -#' @format A data frame with 124 rows and 2 variables: -#' \describe{ -#' \item{cas}{CAS registry number} -#' \item{value}{LC50value} -#' } -#' @source \url{https://cfpub.epa.gov/ecotox/} -"lc50" diff --git a/man/jagst.Rd b/man/jagst.Rd index 7f38392c..486b6743 100644 --- a/man/jagst.Rd +++ b/man/jagst.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/webchem-package.R +% Please edit documentation in R/jagst.R \docType{data} \name{jagst} \alias{jagst} diff --git a/man/lc50.Rd b/man/lc50.Rd index 68a5b975..ff81b7ff 100644 --- a/man/lc50.Rd +++ b/man/lc50.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/webchem-package.R +% Please edit documentation in R/lc50.R \docType{data} \name{lc50} \alias{lc50} From 4cb98f3c842f0d5b97212e19da31d9d798fbefbb Mon Sep 17 00:00:00 2001 From: Tamas Stirling Date: Sun, 22 Dec 2024 13:49:39 +0100 Subject: [PATCH 7/8] Update package documentation file --- R/webchem-package.R | 5 ++--- R/zzz.R | 1 + man/webchem-package.Rd | 42 ++++++++++++++++++++++++++++++++++++++++++ man/webchem.Rd | 10 ---------- 4 files changed, 45 insertions(+), 13 deletions(-) create mode 100644 R/zzz.R create mode 100644 man/webchem-package.Rd delete mode 100644 man/webchem.Rd diff --git a/R/webchem-package.R b/R/webchem-package.R index cfc8b806..f8844b3f 100644 --- a/R/webchem-package.R +++ b/R/webchem-package.R @@ -4,8 +4,7 @@ #' of web APIs for chemical information. #' #' @docType package -#' @name webchem #' @importFrom methods is #' @importFrom utils globalVariables -if (getRversion() >= "2.15.1") - globalVariables(c(".")) +"_PACKAGE" + diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 00000000..d1696162 --- /dev/null +++ b/R/zzz.R @@ -0,0 +1 @@ +if (getRversion() >= "2.15.1") utils::globalVariables(c(".")) \ No newline at end of file diff --git a/man/webchem-package.Rd b/man/webchem-package.Rd new file mode 100644 index 00000000..d1c39607 --- /dev/null +++ b/man/webchem-package.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/webchem-package.R +\docType{package} +\name{webchem-package} +\alias{webchem} +\alias{webchem-package} +\title{webchem: An R package to retrieve chemical information from the web.} +\description{ +Chemical information from around the web. This package interacts with a suite +of web APIs for chemical information. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://docs.ropensci.org/webchem/} + \item \url{https://github.com/ropensci/webchem} + \item Report bugs at \url{https://github.com/ropensci/webchem/issues} +} + +} +\author{ +\strong{Maintainer}: Tamás Stirling \email{stirling.tamas@gmail.com} [contributor] + +Authors: +\itemize{ + \item Eduard Szöcs +} + +Other contributors: +\itemize{ + \item Robert Allaway [contributor] + \item Daniel Muench [contributor] + \item Johannes Ranke [contributor] + \item Andreas Scharmüller [contributor] + \item Eric R Scott [contributor] + \item Jan Stanstrup [contributor] + \item João Vitor F Cavalcante [contributor] + \item Gordon Getzinger [contributor] + \item Ethan Bass [contributor] +} + +} diff --git a/man/webchem.Rd b/man/webchem.Rd deleted file mode 100644 index 4ccbaa97..00000000 --- a/man/webchem.Rd +++ /dev/null @@ -1,10 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/webchem-package.R -\docType{package} -\name{webchem} -\alias{webchem} -\title{webchem: An R package to retrieve chemical information from the web.} -\description{ -Chemical information from around the web. This package interacts with a suite -of web APIs for chemical information. -} From 8449b52c2adfd6f73d89787107183d0b4be7c1e9 Mon Sep 17 00:00:00 2001 From: Tamas Stirling Date: Sun, 22 Dec 2024 20:48:50 +0100 Subject: [PATCH 8/8] Update NEWS --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 8ebb318b..889a6a7f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,8 @@ ## BUG FIXES * `pc_prop()` returned `NA` without much further explanation if any of the queries were not positive integers. The updated function attempts to coerce queries to positive integers, only progresses valid queries, and prints informative messages along the way if verbose messages are enabled. +* `srs_query()` broke because the URL was no longer working. We have updated the URL. +* `is.inchikey(type = "chemspider")` broke because the URL was no longer working. We have updated the URL but the function now requires an API key like all other ChemSpider functions. # webchem 1.3.0