diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4d591daf..2db2ed20 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -64,6 +64,8 @@ Some consistency guidelines: 13. SMILES strings may use special characters like "#". `URLencode()` does not encode this as "%23" by default, so use `URLencode(query, reserved = TRUE)` instead. It's important to note that it's the query that has to be encoded like this, not the full url. +14. Print verbose messages. Use `httr::message_for_status()` and `webchem_message()` functions to generate standard messages when possible. + ### Data Sources You might think all webscraping is perfectly legal but it is unfortunately not that simple. diff --git a/NAMESPACE b/NAMESPACE index 26df862d..45634058 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,7 +31,6 @@ S3method(smiles,pc_prop) S3method(smiles,wd_ident) export(as.cas) export(aw_query) -export(build_aw_idx) export(cas) export(chebi_comp_entity) export(ci_query) diff --git a/NEWS.md b/NEWS.md index 6c5e4cbd..1c0f5b74 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# webchem 1.0.0.9005 +# webchem 1.0.0.9010 ## NEW FEATURES @@ -8,6 +8,8 @@ ## MINOR IMPROVEMENTS +* Most functions now use httr::RETRY() to access webservices. +* Verbose messages are now harmonized. * The `"type"` argument in `ci_query()` and `aw_query()` has been changed to `"from"` for consistency with other functions * `fn_percept()` and `cts_compinfo()` now have `"query"` and `"from"` arguments for consistency with other functions * Possible values for `"from"` have been made more consistent across functions diff --git a/R/alanwood.R b/R/alanwood.R index aa943078..62906932 100644 --- a/R/alanwood.R +++ b/R/alanwood.R @@ -8,9 +8,7 @@ #' @param query character; search string #' @param from character; type of input ('cas' or 'name') #' @param verbose logical; print message during processing to console? -#' @param force_build logical; force building a new index? See -#' \code{\link{build_aw_idx}} for more details. -#' @param ... currently unused. +#' @param ... additional arguments to internal utility functions #' @param type deprecated #' @return A list of eight entries: common-name, status, preferred IUPAC Name, #' IUPAC Name, cas, formula, activity, subactivity, inchikey, inchi and source @@ -35,25 +33,29 @@ #' # use CAS-numbers #' aw_query("79622-59-6", from = 'cas') #' } -#' @seealso \code{\link{build_aw_idx}} - aw_query <- function(query, from = c("name", "cas"), verbose = TRUE, - force_build = FALSE, type, ...) { + type, ...) { + + if (ping_service("aw") == FALSE) stop(webchem_message("service_down")) + if (!missing(type)) { message('"type" is deprecated. Please use "from" instead. ') from <- type } if ("commonname" %in% from) { - warning('To search by compound name use "name" instead of "commonname"') + message('To search by compound name use "name" instead of "commonname"') from <- "name" } from <- match.arg(from) - aw_idx <- build_aw_idx(verbose, force_build) + aw_idx <- build_aw_idx(verbose, ...) foo <- function(query, from, verbose) { - on.exit(suppressWarnings(closeAllConnections())) - + if (is.na(query)) { + if (verbose) webchem_message("na") + return(NA) + } + if (verbose) webchem_message("query", query, appendLF = FALSE) # search links in indexes if (from == "name") { links <- aw_idx$links[aw_idx$source == "cn"] @@ -70,80 +72,92 @@ aw_query <- function(query, from = c("name", "cas"), verbose = TRUE, } takelink <- links[tolower(names) == tolower(query)] - if (is.na(query)) takelink <- vector() if (length(takelink) == 0) { - message("Not found! Returning NA.\n") + if (verbose) message("Not found.") return(NA) } if (length(takelink) > 1) { takelink <- unique(takelink) if (length(takelink) > 1) { - message("More then one link found! Returning first.\n") + message("More then one link found. Returning first.") takelink <- takelink[1] } } - if (verbose) - message("Querying ", takelink) + qurl <- paste0("http://www.alanwood.net/pesticides/", takelink) Sys.sleep(rgamma(1, shape = 15, scale = 1 / 10)) - ttt <- read_html(paste0("http://www.alanwood.net/pesticides/", takelink)) - - status <- xml_text( - xml_find_all(ttt, "//tr/th[@id='r1']/following-sibling::td")) - pref_iupac_name <- xml_text( - xml_find_all(ttt, "//tr/th[@id='r2']/following-sibling::td")) - iupac_name <- xml_text( - xml_find_all(ttt, "//tr/th[@id='r3']/following-sibling::td")) - cas <- xml_text( - xml_find_all(ttt, "//tr/th[@id='r5']/following-sibling::td")) - formula <- xml_text( - xml_find_all(ttt, "//tr/th[@id='r6']/following-sibling::td")) - activity <- xml_text( - xml_find_all(ttt, "//tr/th[@id='r7']/following-sibling::td")) - subactivity <- trimws( - strsplit(gsub("^.*\\((.*)\\)", "\\1", activity), ";")[[1]]) - activity <- gsub("^(.*) \\(.*\\)", "\\1", activity) - inchikey_r <- xml_text( - xml_find_all(ttt, "//tr/th[@id='r11']/following-sibling::td")) - if (length(inchikey_r) == 0) { - inchikey <- NA - } else { - if (grepl("isomer", inchikey_r)) { - inchikey <- c( - s_isomer = gsub( - ".*\\(S\\)-isomer:(.*)(minor component.*)", "\\1", inchikey_r), - r_isomer = gsub(".*\\(R\\)-isomer:(.*)", "\\1", inchikey_r)) - } - if (grepl("identifier", inchikey_r)) { - inchikey <- c(gsub("(.*)identifier.*", "\\1", inchikey_r), - gsub(".*identifier.*:(.*)", "\\1", inchikey_r)) - names(inchikey) <- c("inchikey", - gsub(".*(identifier.*:).*", "\\1", inchikey_r) - ) - } - if (!grepl("isomer", inchikey_r) & !grepl("identifier", inchikey_r)) - inchikey <- inchikey_r + res <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") + return(NA) } + if (verbose) message(httr::message_for_status(res)) + if (res$status_code == 200){ + ttt <- read_html(res) + status <- xml_text( + xml_find_all(ttt, "//tr/th[@id='r1']/following-sibling::td")) + pref_iupac_name <- xml_text( + xml_find_all(ttt, "//tr/th[@id='r2']/following-sibling::td")) + iupac_name <- xml_text( + xml_find_all(ttt, "//tr/th[@id='r3']/following-sibling::td")) + cas <- xml_text( + xml_find_all(ttt, "//tr/th[@id='r5']/following-sibling::td")) + formula <- xml_text( + xml_find_all(ttt, "//tr/th[@id='r6']/following-sibling::td")) + activity <- xml_text( + xml_find_all(ttt, "//tr/th[@id='r7']/following-sibling::td")) + subactivity <- trimws( + strsplit(gsub("^.*\\((.*)\\)", "\\1", activity), ";")[[1]]) + activity <- gsub("^(.*) \\(.*\\)", "\\1", activity) + inchikey_r <- xml_text( + xml_find_all(ttt, "//tr/th[@id='r11']/following-sibling::td")) + if (length(inchikey_r) == 0) { + inchikey <- NA + } else { + if (grepl("isomer", inchikey_r)) { + inchikey <- c( + s_isomer = gsub( + ".*\\(S\\)-isomer:(.*)(minor component.*)", "\\1", inchikey_r), + r_isomer = gsub(".*\\(R\\)-isomer:(.*)", "\\1", inchikey_r)) + } + if (grepl("identifier", inchikey_r)) { + inchikey <- c(gsub("(.*)identifier.*", "\\1", inchikey_r), + gsub(".*identifier.*:(.*)", "\\1", inchikey_r)) + names(inchikey) <- c("inchikey", + gsub(".*(identifier.*:).*", "\\1", inchikey_r) + ) + } + if (!grepl("isomer", inchikey_r) & !grepl("identifier", inchikey_r)) + inchikey <- inchikey_r + } - inchi <- xml_text( - xml_find_all(ttt, "//tr/th[@id='r12']/following-sibling::td")) - if (length(inchi) == 0) { - inchi <- NA - } else { - if (grepl("isomer", inchi)) { - inchi <- c(s_isomer = gsub(".*\\(S\\)-isomer:(.*)(minor component.*)", - "\\1", inchi), - r_isomer = gsub(".*\\(R\\)-isomer:(.*)", "\\1", inchi)) + inchi <- xml_text( + xml_find_all(ttt, "//tr/th[@id='r12']/following-sibling::td")) + if (length(inchi) == 0) { + inchi <- NA + } else { + if (grepl("isomer", inchi)) { + inchi <- c(s_isomer = gsub(".*\\(S\\)-isomer:(.*)(minor component.*)", + "\\1", inchi), + r_isomer = gsub(".*\\(R\\)-isomer:(.*)", "\\1", inchi)) + } } + # add source url + source_url <- paste0("http://www.alanwood.net/pesticides/", takelink) + out <- list(cname = cname, status = status, + pref_iupac_name = pref_iupac_name, iupac_name = iupac_name, + cas = cas, formula = formula, activity = activity, + subactivity = subactivity, inchikey = inchikey, inchi = inchi, + source_url = source_url) + return(out) + } + else { + return(NA) } - # add source url - source_url <- paste0("http://www.alanwood.net/pesticides/", takelink) - out <- list(cname = cname, status = status, - pref_iupac_name = pref_iupac_name, iupac_name = iupac_name, - cas = cas, formula = formula, activity = activity, - subactivity = subactivity, inchikey = inchikey, inchi = inchi, - source_url = source_url) - return(out) } out <- lapply(query, function(x) foo(x, from = from, verbose = verbose)) out <- setNames(out, query) @@ -155,19 +169,18 @@ aw_query <- function(query, from = c("name", "cas"), verbose = TRUE, #' #' This function builds an index of Alan Woods Compendium of Pesticides #' \url{http://www.alanwood.net/pesticides} and saves it to -#' \code{\link{tempdir}}. This is a utility function for -#' \code{\link{aw_query}} and will not be exported in future releases. +#' \code{\link{tempdir}}. #' @import xml2 +#' #' @param verbose logical; print message during processing to console? #' @param force_build logical; force building a new index? #' @return a data.frame #' @seealso \code{\link{aw_query}}, \code{\link{tempdir}} #' @author Eduard Szöcs, \email{eduardszoecs@@gmail.com} #' @source \url{http://www.alanwood.net/pesticides} -#' @export +#' @noRd build_aw_idx <- function(verbose = TRUE, force_build = FALSE) { - on.exit(suppressWarnings(closeAllConnections())) - message(msg = "build_aw_idx() will not be exported in future releases.") + if (ping_service("aw") == FALSE) stop(webchem_message("service_down")) suppressWarnings(try(load(paste0(tempdir(), "/data/aw_idx.rda")), silent = TRUE)) if (!file.exists(paste0(tempdir(), "/data/aw_idx.rda")) | @@ -176,40 +189,48 @@ build_aw_idx <- function(verbose = TRUE, force_build = FALSE) { if (!dir.exists(paste0(tempdir(), "/data"))) { dir.create(paste0(tempdir(), "/data")) } - if (verbose == TRUE) { - message("Building index.", appendLF = FALSE) - } - idx1 <- read_html("http://www.alanwood.net/pesticides/index_rn.html") - prep_idx <- function(y) { - names <- xml_text(xml_find_all(y, "//dl/dt")) - links <- xml_attr( - xml_find_all(y, "//dt/following-sibling::dd[1]/a[1]"), "href") - linknames <- xml_text(xml_find_all(y, "//dt/following-sibling::dd[1]/a[1]")) - return(data.frame(names, links, linknames, stringsAsFactors = FALSE)) + if (verbose) message("Building index. ", appendLF = FALSE) + res <- try(httr::RETRY("GET", + "http://www.alanwood.net/pesticides/index_rn.html", + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent= TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") + return(NA) } - aw_idx <- rbind(prep_idx(idx1)) - aw_idx[["source"]] <- "rn" - idx4 <- read_html("http://www.alanwood.net/pesticides/index_cn.html") - n <- xml_find_all(idx4, "//a") - names <- xml_text(n) - rm <- names == "" - names <- names[!rm] - links <- xml_attr(n, "href") - links <- links[!rm] - idx4 <- data.frame(names = NA, links = links, linknames = names, - source = "cn", stringsAsFactors = FALSE) - aw_idx <- rbind(aw_idx, idx4) + if (verbose) message(httr::message_for_status(res)) + if (res$status_code == 200){ + idx1 <- read_html("http://www.alanwood.net/pesticides/index_rn.html") + prep_idx <- function(y) { + names <- xml_text(xml_find_all(y, "//dl/dt")) + links <- xml_attr( + xml_find_all(y, "//dt/following-sibling::dd[1]/a[1]"), "href") + linknames <- xml_text( + xml_find_all(y, "//dt/following-sibling::dd[1]/a[1]")) + return(data.frame(names, links, linknames, stringsAsFactors = FALSE)) + } + aw_idx <- rbind(prep_idx(idx1)) + aw_idx[["source"]] <- "rn" + idx4 <- read_html("http://www.alanwood.net/pesticides/index_cn.html") + n <- xml_find_all(idx4, "//a") + names <- xml_text(n) + rm <- names == "" + names <- names[!rm] + links <- xml_attr(n, "href") + links <- links[!rm] + idx4 <- data.frame(names = NA, links = links, linknames = names, + source = "cn", stringsAsFactors = FALSE) + aw_idx <- rbind(aw_idx, idx4) - # fix encoding - ln <- aw_idx$linknames - Encoding(ln) <- "latin1" - ln <- iconv(ln, from = "latin1", to = "ASCII", sub = "") - aw_idx$linknames <- ln - attr(aw_idx, "date") <- Sys.Date() - if (verbose == TRUE) { - message(" Done.") + # fix encoding + ln <- aw_idx$linknames + Encoding(ln) <- "latin1" + ln <- iconv(ln, from = "latin1", to = "ASCII", sub = "") + aw_idx$linknames <- ln + attr(aw_idx, "date") <- Sys.Date() + save(aw_idx, file = paste0(tempdir(), "/data/aw_idx.rda")) } - save(aw_idx, file = paste0(tempdir(), "/data/aw_idx.rda")) } return(aw_idx) } diff --git a/R/chebi.R b/R/chebi.R index bca3f3ac..585cd7dc 100644 --- a/R/chebi.R +++ b/R/chebi.R @@ -78,6 +78,9 @@ get_chebiid <- function(query, stars = c('all', 'two only', 'three only'), verbose = TRUE, ...) { + + if (ping_service("chebi") == FALSE) stop(webchem_message("service_down")) + match <- match.arg(match) from <- toupper(match.arg(from)) if (from == "NAME") { @@ -90,9 +93,10 @@ get_chebiid <- function(query, stars <- toupper(match.arg(stars)) foo <- function(query, from, match, max_res, stars, verbose, ...) { - if (is.na(query)) return(tibble("query" = NA_character_, - "chebiid" = NA_character_)) - + if (is.na(query)) { + if (verbose) webchem_message("na") + return(tibble::tibble("query" = NA_character_, "chebiid" = NA_character_)) + } # query url <- 'http://www.ebi.ac.uk:80/webservices/chebi/2.0/webservice' headers <- c(Accept = 'text/xml', @@ -114,20 +118,26 @@ get_chebiid <- function(query, ') Sys.sleep(rgamma(1, shape = 5, scale = 1/10)) - if (verbose) - message(query, ': ', url) - res <- POST(url, - add_headers(headers), - body = body) + if (verbose) webchem_message("query", query, appendLF = FALSE) + res <- try(httr::RETRY("POST", + url, + httr::user_agent(webchem_url()), + httr::add_headers(headers), + body = body, + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") + return(tibble::tibble("query" = query, "chebiid" = NA_character_)) + } + if (verbose) message(httr::message_for_status(res)) if (res$status_code == 200) { - cont <- try(content(res, type = 'text/xml', encoding = 'utf-8'), - silent = TRUE) + cont <- content(res, type = 'text/xml', encoding = 'utf-8') out <- l2df(as_list(xml_children(xml_find_first(cont, '//d1:return')))) out <- as_tibble(setNames(out, tolower(names(out)))) if (nrow(out) == 0) { - message('No result found. \n') - return(tibble("query" = query, - "chebiid" = NA_character_)) + webchem_message("not_found") + return(tibble::tibble("query" = query, "chebiid" = NA_character_)) } if (nrow(out) > 0) out$query <- query if (nrow(out) == 1) return(out) @@ -152,18 +162,14 @@ get_chebiid <- function(query, return(out[out$chebiid == matched, ]) } if (match == "na") { - return(tibble("query" = query, - "chebiid" = NA_character_)) - } + return(tibble::tibble("query" = query, "chebiid" = NA_character_)) + } if (match == "first") { return(out[1, ]) } } else { - out <- tibble("query" = query, - "chebiid" = NA_character_) - message('Returning NA (', http_status(res)$message, '). \n') - return(out) - } + return(tibble::tibble("query" = query, "chebiid" = NA_character_)) + } } out <- lapply(query, foo, @@ -237,9 +243,14 @@ get_chebiid <- function(query, #' } chebi_comp_entity <- function(chebiid, verbose = TRUE, ...) { + if (ping_service("chebi") == FALSE) stop(webchem_message("service_down")) + foo <- function(chebiid, verbose, ...) { # chebiid = c('CHEBI:27744', 'CHEBI:17790'); verbose = TRUE # debuging - if (is.na(chebiid)) return(NA) + if (is.na(chebiid)) { + if (verbose) webchem_message("na") + return(NA) + } url <- 'http://www.ebi.ac.uk:80/webservices/chebi/2.0/webservice' headers <- c(Accept = 'text/xml', Accept = 'multipart/*', @@ -256,14 +267,21 @@ chebi_comp_entity <- function(chebiid, verbose = TRUE, ...) { ') - if (verbose) - message(chebiid, ': ', url) + if (verbose) webchem_message("query", chebiid, appendLF = FALSE) Sys.sleep(rgamma(1, shape = 5, scale = 1/10)) - res <- POST(url, - add_headers(headers), - body = body) + res <- try(httr::RETRY("POST", + url, + httr::user_agent(webchem_url()), + httr::add_headers(headers), + body = body, + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") + return(NA) + } + if (verbose) message(httr::message_for_status(res)) if (res$status_code != 200) { - warning(http_status(res)$message) return(NA) } else { cont <- content(res, type = 'text/xml', encoding = 'utf-8') diff --git a/R/chemid.R b/R/chemid.R index b2e26d1a..75cc4c31 100644 --- a/R/chemid.R +++ b/R/chemid.R @@ -55,19 +55,22 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), match = c('best', 'first', 'ask', 'na'), verbose = TRUE, type){ + + if (ping_service("ci") == FALSE) stop(webchem_message("service_down")) + if(!missing(type)) { - warning('"type" is deprecated. Please use "from" instead. ') + message('"type" is deprecated. Please use "from" instead. ') from <- type } from <- match.arg(from) match <- match.arg(match) foo <- function(query, from, match, verbose){ - on.exit(suppressWarnings(closeAllConnections())) if (is.na(query)) { - message('query is NA! Returning NA.\n') + if (verbose) webchem_message("na") return(NA) } + if (verbose) webchem_message("query", query, appendLF = FALSE) query <- URLencode(query, reserved = TRUE) baseurl <- switch( from, @@ -76,156 +79,185 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), inchikey = "https://chem.nlm.nih.gov/chemidplus/inchikey/startswith/") # return max 50 hits qurl <- paste0(baseurl, query, '?DT_START_ROW=0&DT_ROWS_PER_PAGE=50') - if (verbose) - message(qurl) Sys.sleep( rgamma(1, shape = 15, scale = 1/10)) - ttt <- try(read_html(qurl), silent = TRUE) - if (inherits(ttt, 'try-error')) { - message('Not found! Returning NA.\n') - return(NA) - } - - tit <- xml_text(xml_find_all(ttt, "//head/title")) - no <- xml_text(xml_find_all(ttt, "//h3")) - if (length(no) != 0 && 'The following query produced no records:' %in% no) { - message('Not found! Returning NA.\n') + res <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") return(NA) } - - # handle multiple inputs - if (grepl('^ChemIDplus Results - Chemical information', x = tit)) { - if (verbose) - message("More then one Link found. \n") - hit_names <- xml_text(xml_find_all(ttt, "//a[@title='Open record details']")) - hit_cas <- xml_text(xml_find_all(ttt, "//a[@title='Open record details']/following-sibling::text()[1]")) - # exclude missing cas - trm <- nchar(hit_cas) < 5 - hit_cas <- hit_cas[!trm] - hit_names <- hit_names[!trm] - - if (match == 'first') { - if (verbose) - message("Returning first match. \n") - hit_cas <- hit_cas[1] - matched_sub <- hit_names[1] - d <- 'first' + if (verbose) message(httr::message_for_status(res)) + if (res$status_code == 200){ + ttt <- read_html(res) + tit <- xml_text(xml_find_all(ttt, "//head/title")) + no <- xml_text(xml_find_all(ttt, "//h3")) + if (length(no) != 0 && 'The following query produced no records:' %in% no) { + webchem_message("not_found") + return(NA) } - if (match == 'best') { + # handle multiple inputs + if (grepl('^ChemIDplus Results - Chemical information', x = tit)) { if (verbose) - message("Returning best match. \n") - hit_names <- gsub(' \\[.*\\]', '', hit_names) - dd <- adist(URLdecode(query), hit_names) / nchar(hit_names) - hit_cas <- hit_cas[which.min(dd)] - matched_sub <- hit_names[which.min(dd)] - d <- dd[which.min(dd)] - } + message(" More then one Link found. ", appendLF = FALSE) + hit_names <- xml_text( + xml_find_all(ttt, "//a[@title='Open record details']")) + hit_cas <- xml_text( + xml_find_all( + ttt, + "//a[@title='Open record details']/following-sibling::text()[1]")) + # exclude missing cas + trm <- nchar(hit_cas) < 5 + hit_cas <- hit_cas[!trm] + hit_names <- hit_names[!trm] - if (match == 'na') { - if (verbose) - message("Returning NA. \n") - return(NA) - } + if (match == 'first') { + if (verbose) + message("Returning first match.") + hit_cas <- hit_cas[1] + matched_sub <- hit_names[1] + d <- 'first' + } + + if (match == 'best') { + if (verbose) + message("Returning best match.") + hit_names <- gsub(' \\[.*\\]', '', hit_names) + dd <- adist(URLdecode(query), hit_names) / nchar(hit_names) + hit_cas <- hit_cas[which.min(dd)] + matched_sub <- hit_names[which.min(dd)] + d <- dd[which.min(dd)] + } + + if (match == 'na') { + if (verbose) + message("Returning NA.") + return(NA) + } + + if (match == 'ask') { + tochoose <- data.frame(name = hit_names, cas = hit_cas) + print(tochoose) + message("Enter rownumber of compounds (other inputs will return 'NA'): ") + take <- as.numeric(scan(n = 1, quiet = TRUE)) + if (length(take) == 0) { + return(NA) + } + if (take %in% seq_len(nrow(tochoose))) { + hit_cas <- hit_cas[take] + matched_sub <- hit_names[take] + d <- 'interactive' + } + } + + # check hit + if (is.na(hit_cas)) { + if (verbose) + message('CAS not found! Returning NA.') + return(NA) + } - if (match == 'ask') { - tochoose <- data.frame(name = hit_names, cas = hit_cas) - print(tochoose) - message("\nEnter rownumber of compounds (other inputs will return 'NA'):\n") # prompt - take <- as.numeric(scan(n = 1, quiet = TRUE)) - if (length(take) == 0) { + # retry with CAS-API + qurl <- paste0('https://chem.nlm.nih.gov/chemidplus/rn/', hit_cas) + if (verbose) webchem_message("query", hit_cas, appendLF = FALSE) + Sys.sleep( rgamma(1, shape = 15, scale = 1/10)) + res <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") return(NA) } - if (take %in% seq_len(nrow(tochoose))) { - hit_cas <- hit_cas[take] - matched_sub <- hit_names[take] - d <- 'interactive' + if (verbose) message(httr::message_for_status(res)) + if (res$status_code == 200) { + ttt <- read_html(res) + source_url <- qurl } + + } else { + d <- 'direct match' + matched_sub <- xml_text( + xml_find_all( + ttt, + "//h3[contains(., 'Name of Substance')]/following-sibling::div[1]//li"))[1] + source_url <- gsub('^(.*)\\?.*', '\\1', qurl) } - # check hit - if (is.na(hit_cas)) { - if (verbose) - message('CAS not found! Returning NA.\n') - return(NA) + if(is.na(xml_find_first(ttt, "//h3[contains(., 'Name of Substance')]/following-sibling::div[1]//li"))){ + name <- NA + }else{ + name <- xml_text(xml_find_all(ttt, "//h3[contains(., 'Name of Substance')]/following-sibling::div[1]//li")) } - # retry with CAS-API - qurl <- paste0('https://chem.nlm.nih.gov/chemidplus/rn/', hit_cas) - if (verbose) - message(qurl) - Sys.sleep( rgamma(1, shape = 15, scale = 1/10)) - ttt <- try(read_html(qurl), silent = TRUE) - source_url <- qurl - } else { - d <- 'direct match' - matched_sub <- xml_text(xml_find_all(ttt, "//h3[contains(., 'Name of Substance')]/following-sibling::div[1]//li"))[1] - source_url <- gsub('^(.*)\\?.*', '\\1', qurl) - } + if(is.na(xml_find_first(ttt, "//h3[contains(., 'Synonyms')]/following-sibling::div[1]//li"))){ + synonyms <- NA + }else{ + synonyms <- xml_text(xml_find_all(ttt, "//h3[contains(., 'Synonyms')]/following-sibling::div[1]//li")) + } - if(is.na(xml_find_first(ttt, "//h3[contains(., 'Name of Substance')]/following-sibling::div[1]//li"))){ - name <- NA - }else{ - name <- xml_text(xml_find_all(ttt, "//h3[contains(., 'Name of Substance')]/following-sibling::div[1]//li")) - } + if(is.na(xml_find_first(ttt, "//h3[contains(., 'CAS Registry')]/following-sibling::ul[1]//li"))){ + cas <- NA + } else { + cas <- xml_text(xml_find_all(ttt, "//h3[contains(., 'CAS Registry')]/following-sibling::ul[1]//li")) + } - if(is.na(xml_find_first(ttt, "//h3[contains(., 'Synonyms')]/following-sibling::div[1]//li"))){ - synonyms <- NA - }else{ - synonyms <- xml_text(xml_find_all(ttt, "//h3[contains(., 'Synonyms')]/following-sibling::div[1]//li")) - } + if(is.na(xml_find_first(ttt, "//h3[contains(., 'InChI')]/following-sibling::text()[1]"))){ + inchi <- NA + } else { + inchi <- gsub('\\n|\\t', '', + xml_text(xml_find_all(ttt, "//h3[contains(., 'InChI')]/following-sibling::text()[1]"))[1] + ) + } - if(is.na(xml_find_first(ttt, "//h3[contains(., 'CAS Registry')]/following-sibling::ul[1]//li"))){ - cas <- NA - } else { - cas <- xml_text(xml_find_all(ttt, "//h3[contains(., 'CAS Registry')]/following-sibling::ul[1]//li")) - } + if(is.na(xml_find_first(ttt, "//h3[contains(., 'InChIKey')]/following-sibling::text()[1]"))){ + inchikey <- NA + } else { + inchikey <- gsub('\\n|\\t|\\r', '', + xml_text(xml_find_all(ttt, "//h3[contains(., 'InChIKey')]/following-sibling::text()[1]")) + ) + } - if(is.na(xml_find_first(ttt, "//h3[contains(., 'InChI')]/following-sibling::text()[1]"))){ - inchi <- NA - } else { - inchi <- gsub('\\n|\\t', '', - xml_text(xml_find_all(ttt, "//h3[contains(., 'InChI')]/following-sibling::text()[1]"))[1] - ) - } + if(is.na(xml_find_first(ttt, "//h3[contains(., 'Smiles')]/following-sibling::text()[1]"))){ + smiles <- NA + } else { + smiles <- gsub('\\n|\\t|\\r', '', + xml_text(xml_find_all(ttt, "//h3[contains(., 'Smiles')]/following-sibling::text()[1]")) + ) + } - if(is.na(xml_find_first(ttt, "//h3[contains(., 'InChIKey')]/following-sibling::text()[1]"))){ - inchikey <- NA - } else { - inchikey <- gsub('\\n|\\t|\\r', '', - xml_text(xml_find_all(ttt, "//h3[contains(., 'InChIKey')]/following-sibling::text()[1]")) - ) - } + if(is.na(xml_find_first(ttt, "//h2[contains(., 'Toxicity')]/following-sibling::div//table"))){ + toxicity <- NA + } else { + toxicity <- html_table(xml_find_all(ttt, "//h2[contains(., 'Toxicity')]/following-sibling::div//table"))[[1]] + } - if(is.na(xml_find_first(ttt, "//h3[contains(., 'Smiles')]/following-sibling::text()[1]"))){ - smiles <- NA - } else { - smiles <- gsub('\\n|\\t|\\r', '', - xml_text(xml_find_all(ttt, "//h3[contains(., 'Smiles')]/following-sibling::text()[1]")) - ) - } + if(is.na(xml_find_first(ttt, "//h2[contains(., 'Physical Prop')]/following-sibling::div//table"))){ + physprop <- NA + } else { + physprop <- html_table(xml_find_all(ttt, "//h2[contains(., 'Physical Prop')]/following-sibling::div//table"))[[1]] + physprop[ , 'Value'] <- as.numeric(physprop[ , 'Value']) + #= same as physprop + } - if(is.na(xml_find_first(ttt, "//h2[contains(., 'Toxicity')]/following-sibling::div//table"))){ - toxicity <- NA - } else { - toxicity <- html_table(xml_find_all(ttt, "//h2[contains(., 'Toxicity')]/following-sibling::div//table"))[[1]] - } - if(is.na(xml_find_first(ttt, "//h2[contains(., 'Physical Prop')]/following-sibling::div//table"))){ - physprop <- NA - } else { - physprop <- html_table(xml_find_all(ttt, "//h2[contains(., 'Physical Prop')]/following-sibling::div//table"))[[1]] - physprop[ , 'Value'] <- as.numeric(physprop[ , 'Value']) - #= same as physprop - } + out <- list(name = name, synonyms = synonyms, cas = cas, inchi = inchi, + inchikey = inchikey, smiles = smiles, toxicity = toxicity, + physprop = physprop, source_url = source_url) + attr(out, "matched") <- matched_sub + attr(out, "distance") <- d + class(out) <- 'chemid' + return(out) - out <- list(name = name, synonyms = synonyms, cas = cas, inchi = inchi, - inchikey = inchikey, smiles = smiles, toxicity = toxicity, - physprop = physprop, source_url = source_url) - attr(out, "matched") <- matched_sub - attr(out, "distance") <- d - class(out) <- 'chemid' - return(out) + } + else { + return(NA) + } } out <- lapply(query, foo, from = from, match = match, verbose = verbose) out <- setNames(out, query) diff --git a/R/cir.R b/R/cir.R index 0cda4e47..fb26d368 100644 --- a/R/cir.R +++ b/R/cir.R @@ -119,38 +119,44 @@ cir_query <- function(identifier, representation = "smiles", verbose = TRUE, choices = NULL, ...){ + + if (ping_service("cir") == FALSE) stop(webchem_message("service_down")) + if (!missing("choices")) { stop("`choices` is deprecated. Use `match` instead.") } match <- match.arg(match) foo <- function(identifier, representation, resolver, first, verbose) { if (is.na(identifier)) { + if (verbose) webchem_message("na") return(NA) - } else { - identifier <- URLencode(identifier, reserved = TRUE) - baseurl <- "https://cactus.nci.nih.gov/chemical/structure" - qurl <- paste(baseurl, identifier, representation, 'xml', sep = '/') - - if (!is.null(resolver)) { - qurl <- paste0(qurl, '?resolver=', resolver) - } - if (verbose) - message(qurl) - Sys.sleep(1.5) - h <- try(RETRY("GET", qurl)) - if (inherits(h, "try-error")) { - warning('Problem with web service encountered... Returning NA.') - return(NA) - } else { - tt <- read_xml(content(h, as = 'raw')) - out <- xml_text(xml_find_all(tt, '//item')) - } + } + if (verbose) webchem_message("query", identifier, appendLF = FALSE) + identifier <- URLencode(identifier, reserved = TRUE) + baseurl <- "https://cactus.nci.nih.gov/chemical/structure" + qurl <- paste(baseurl, identifier, representation, 'xml', sep = '/') + if (!is.null(resolver)) { + qurl <- paste0(qurl, '?resolver=', resolver) + } + Sys.sleep(1.5) + h <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(h, "try-error")) { + if (verbose) webchem_message("service_down") + return(NA) + } + if (verbose) message(httr::message_for_status(h)) + if (h$status_code == 200){ + tt <- read_xml(content(h, as = 'raw')) + out <- xml_text(xml_find_all(tt, '//item')) if (length(out) == 0) { - message('No representation found... Returning NA.') + if (verbose) webchem_message("not_found") return(NA) } out <- matcher(out, query = identifier, match = match, verbose = verbose) - # convert to numeric if (representation %in% c('mw', 'monoisotopic_mass', 'h_bond_donor_count', 'h_bond_acceptor_count', 'h_bond_center_count', 'rule_of_5_violation_count', 'rotor_count', @@ -161,6 +167,9 @@ cir_query <- function(identifier, representation = "smiles", out <- as.numeric(out) return(out) } + else { + return(NA) + } } out <- lapply(identifier, foo, representation = representation, resolver = resolver, first = first, verbose = verbose) @@ -278,6 +287,9 @@ cir_img <- function(query, frame = NULL, verbose = TRUE, ...) { + + if (ping_service("cir") == FALSE) stop(webchem_message("service_down")) + if (is.na(dir) || !dir.exists(dir)) { stop('Directory does not exist.') } @@ -364,21 +376,22 @@ cir_img <- function(query, path <- file.path(dir, paste0(query, ".", sub("format=", "", format))) # query Sys.sleep(1) - if (verbose) - message(paste0("Querying ", query, ". "), appendLF = FALSE) - res <- httr::RETRY("GET", - qurl, - quiet = TRUE, - terminate_on = 404, - httr::write_disk(path, overwrite = TRUE)) - if (verbose) { - message(httr::message_for_status(res), " ", appendLF = FALSE) - if (httr::http_error(res) && file.exists(path)) { - file.remove(path) - message("No image saved.") - } else { - message("Image saved under: ", path) - } + if (verbose) webchem_message("query", query, appendLF = FALSE) + res <- try(httr::RETRY("GET", + qurl, + quiet = TRUE, + terminate_on = 404, + httr::write_disk(path, overwrite = TRUE), + httr::user_agent(webchem_url())), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") + return(NA) + } + if (verbose) message(httr::message_for_status(res)) + if (httr::http_error(res) && file.exists(path)) { + file.remove(path) + } else { + if (verbose) message(" Image saved under: ", path) } } for (i in query) { diff --git a/R/cts.R b/R/cts.R index f3ed22e9..1975f53e 100644 --- a/R/cts.R +++ b/R/cts.R @@ -32,26 +32,44 @@ #' sapply(out2, function(y) y$molweight) #' } cts_compinfo <- function(query, from = "inchikey", verbose = TRUE, inchikey){ + + if (ping_service("cts") == FALSE) stop(webchem_message("service_down")) + if (!missing(inchikey)) { - warning('"inchikey" is deprecated. Please use "query" instead.') + message('"inchikey" is deprecated. Please use "query" instead.') query <- inchikey } - match.arg(from) + from <- match.arg(from) foo <- function(query, verbose) { - if (!is.inchikey(query)) { - stop('Input is not a valid inchikey!') + if (is.na(query)) { + if (verbose) webchem_message("na") + return(NA) + } + if(verbose) webchem_message("query", query, appendLF = FALSE) + if (!is.inchikey(query, verbose = FALSE)) { + if (verbose) message("Input is not a valid inchikey.") + return(NA) } baseurl <- "http://cts.fiehnlab.ucdavis.edu/service/compound" qurl <- paste0(baseurl, '/', query) - if (verbose) - message(qurl) - Sys.sleep( rgamma(1, shape = 15, scale = 1/10)) - out <- try(fromJSON(qurl), silent = TRUE) + Sys.sleep(stats::rgamma(1, shape = 15, scale = 1/10)) + out <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) if (inherits(out, "try-error")) { - warning('Not found... Returning NA.') + if (verbose) webchem_message("service_down") + return(NA) + } + if (verbose) message(httr::message_for_status(out)) + if (out$status_code == 200) { + out <- jsonlite::fromJSON(rawToChar(out$content)) + return(out) + } + else { return(NA) } - return(out) } out <- lapply(query, foo, verbose = verbose) out <- setNames(out, query) @@ -105,6 +123,9 @@ cts_convert <- function(query, verbose = TRUE, choices = NULL, ...){ + + if (ping_service("cts") == FALSE) stop(webchem_message("service_down")) + if(!missing("choices")) { if (is.null(choices)) { message('"choices" is deprecated. Using match = "all" instead.') @@ -137,31 +158,43 @@ cts_convert <- function(query, } foo <- function(query, from, to, first, verbose){ - if (is.na(query)) return(NA) + if (is.na(query)) { + if (verbose) webchem_message("na") + return(NA) + } + if(verbose) webchem_message("query", query, appendLF = FALSE) query <- URLencode(query, reserved = TRUE) from <- URLencode(from, reserved = TRUE) to <- URLencode(to, reserved = TRUE) baseurl <- "http://cts.fiehnlab.ucdavis.edu/service/convert" qurl <- paste0(baseurl, '/', from, '/', to, '/', query) - if (verbose) - message(qurl) - Sys.sleep( rgamma(1, shape = 15, scale = 1/10)) - out <- try(fromJSON(qurl), silent = TRUE) - if (inherits(out, "try-error")) { - warning('Not found... Returning NA.') + Sys.sleep(stats::rgamma(1, shape = 15, scale = 1/10)) + res <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") return(NA) } - if (length(out$result[[1]]) == 0) { - message("Not found. Returning NA.") + if (verbose) message(httr::message_for_status(res)) + if (res$status_code == 200) { + out <- jsonlite::fromJSON(rawToChar(res$content)) + if (length(out$result[[1]]) == 0) { + if (verbose) webchem_message("not_found") return(NA) + } + out <- out$result[[1]] + out <- matcher(out, match = match, query = query, verbose = verbose) + return(out) + } + else { + return(NA) } - out <- out$result[[1]] - out <- matcher(out, match = match, query = query, verbose = verbose) - return(out) } out <- lapply(query, foo, from = from, to = to, first = first, verbose = verbose) out <- setNames(out, query) - return(out) } @@ -185,7 +218,21 @@ cts_convert <- function(query, #' cts_from() #' } cts_from <- function(verbose = TRUE){ - tolower(fromJSON('http://cts.fiehnlab.ucdavis.edu/service/conversion/fromValues')) + + if (ping_service("cts") == FALSE) stop(webchem_message("service_down")) + + qurl <- "http://cts.fiehnlab.ucdavis.edu/service/conversion/fromValues" + res <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") + return(NA) + } + out <- tolower(jsonlite::fromJSON(rawToChar(res$content))) + return(out) } @@ -208,5 +255,19 @@ cts_from <- function(verbose = TRUE){ #' cts_from() #' } cts_to <- function(verbose = TRUE){ - tolower(fromJSON('http://cts.fiehnlab.ucdavis.edu/service/conversion/toValues')) + + if (ping_service("cts") == FALSE) stop(webchem_message("service_down")) + + qurl <- "http://cts.fiehnlab.ucdavis.edu/service/conversion/toValues" + res <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") + return(NA) + } + out <- tolower(jsonlite::fromJSON(rawToChar(res$content))) + return(out) } diff --git a/R/etox.R b/R/etox.R index 799166b3..1046aa78 100644 --- a/R/etox.R +++ b/R/etox.R @@ -47,6 +47,9 @@ get_etoxid <- function(query, from = c("name", "cas", "ec", "gsbl", "rtecs"), match = c("all", "best", "first", "ask", "na"), verbose = TRUE) { + + if (ping_service("etox") == FALSE) stop(webchem_message("service_down")) + clean_char <- function(x) { # rm \n \t x <- gsub("\n | \t", "", x) @@ -63,14 +66,11 @@ get_etoxid <- function(query, warning("match = 'best' only makes sense when querying chemical names. ") } foo <- function(query, from, match, verbose) { - on.exit(suppressWarnings(closeAllConnections())) - if (is.na(query)) { - empty <- list(query = NA, match = NA, etoxid = NA) - return(empty) + if (verbose) webchem_message("na") + return(tibble("query" = query, "match" = NA, "etoxid" = NA)) } - if (verbose) - message("Searching ", query) + if(verbose) webchem_message("query", query, appendLF = FALSE) baseurl <- "https://webetox.uba.de/webETOX/public/search/stoff.do" if (from == 'name') { body <- list("stoffname.selection[0].name" = query, @@ -86,33 +86,47 @@ get_etoxid <- function(query, 'stoffnummer.selection[0].type' = type, event = "Search") } - Sys.sleep(rgamma(1, shape = 15, scale = 1/10)) - h <- POST(url = baseurl, - handle = handle(''), - body = body) - tt <- read_html(h) - subs <- clean_char(xml_text(xml_find_all( - tt, "//*/table[@class = 'listForm resultList']//a")))[-1] - if (length(subs) == 0) { - if (verbose) - message("Substance not found! Returning NA.") - hit <- tibble("query" = query, - "match" = NA, - "etoxid" = NA) - return(hit) - } else { - links <- xml_attr(xml_find_all( - tt, "//*/table[@class = 'listForm resultList']//a"), "href")[-1] + Sys.sleep(stats::rgamma(1, shape = 15, scale = 1/10)) + h <- try(httr::RETRY("POST", + url = baseurl, + httr::user_agent(webchem_url()), + handle = handle(''), + body = body, + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(h, "try-error")) { + if (verbose) webchem_message("service_down") + return(tibble::tibble("query" = query, "match" = NA, "etoxid" = NA)) + } + if (verbose) message(httr::message_for_status(h)) + if (h$status_code == 200) { + tt <- read_html(h) + subs <- clean_char(xml_text(xml_find_all( + tt, "//*/table[@class = 'listForm resultList']//a")))[-1] + if (length(subs) == 0) { + if (verbose) webchem_message("not_found") + hit <- tibble("query" = query, + "match" = NA, + "etoxid" = NA) + return(hit) + } else { - subs_names <- gsub(" \\(.*\\)", "", subs) - id <- gsub("^.*\\?id=(.*)", "\\1", links) + links <- xml_attr(xml_find_all( + tt, "//*/table[@class = 'listForm resultList']//a"), "href")[-1] - out <- matcher(id, query = query, result = subs_names, match = match) + subs_names <- gsub(" \\(.*\\)", "", subs) + id <- gsub("^.*\\?id=(.*)", "\\1", links) - hit <- tibble("query" = query, - "match" = names(out), - "etoxid" = out) - return(hit) + out <- matcher(id, query = query, result = subs_names, match = match) + + hit <- tibble("query" = query, + "match" = names(out), + "etoxid" = out) + return(hit) + } + } + else { + return(tibble::tibble(query = NA, match = NA, etoxid = NA)) } } out <- lapply(query, foo, from = from, match = match, verbose = verbose) @@ -128,7 +142,6 @@ get_etoxid <- function(query, #' @import xml2 #' @importFrom rvest html_table #' @importFrom stats rgamma -#' #' @param id character; ETOX ID #' @param verbose logical; print message during processing to console? #' @@ -158,79 +171,91 @@ get_etoxid <- function(query, #' out <- etox_basic(ids) #' out #' -#' # extract ec numbers -#' sapply(out, function(y) y$ec) +#' # extract cas numbers +#' sapply(out, function(y) y$cas) #' } etox_basic <- function(id, verbose = TRUE) { - if (!mode(id) %in% c("numeric", "character")) { - stop("id must be a vector!") - } - # id <- c("20179", "9051") + + if (ping_service("etox") == FALSE) stop(webchem_message("service_down")) + foo <- function(id, verbose) { - on.exit(suppressWarnings(closeAllConnections())) if (is.na(id)) { - message('ID is NA! Returning NA.\n') + if (verbose) webchem_message("na") return(NA) } baseurl <- 'https://webetox.uba.de/webETOX/public/basics/stoff.do?language=en&id=' qurl <- paste0(baseurl, id) - if (verbose) - message('Querying ', qurl) - Sys.sleep(rgamma(1, shape = 15, scale = 1 / 10)) - tt <- try(read_html(qurl), silent = TRUE) - if (inherits(tt, 'try-error')) { - message('ID not found! Returning NA.\n') - return(NA) - } - tabs <- try(suppressWarnings(html_table(tt, fill = TRUE)), silent = TRUE) - if (inherits(tabs, 'try-error')) { - message('ID found. No data available. Returning NA.\n') + if(verbose) webchem_message("query", id, appendLF = FALSE) + Sys.sleep(stats::rgamma(1, shape = 15, scale = 1 / 10)) + res <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") return(NA) } - binf <- tabs[[length(tabs)]] - cas <- binf[, 1][binf[, 2] == 'CAS'] - ec <- binf[, 1][grepl('^EC$|EINEC', binf[, 2])] - gsbl <- binf[, 1][binf[, 2] == 'GSBL'] + if (verbose) message(httr::message_for_status(res)) + if (res$status_code == 200) { + tt <- try(read_html(res), silent = TRUE) + if (inherits(tt, 'try-error')) { + webchem_message("not_found") + return(NA) + } + tabs <- try(suppressWarnings(html_table(tt, fill = TRUE)), silent = TRUE) + if (inherits(tabs, 'try-error')) { + webchem_message("not_found") + return(NA) + } + binf <- tabs[[length(tabs)]] + cas <- binf[, 1][binf[, 2] == 'CAS'] + ec <- binf[, 1][grepl('^EC$|EINEC', binf[, 2])] + gsbl <- binf[, 1][binf[, 2] == 'GSBL'] - syns <- tabs[[2]][c(1, 3, 4)] - colnames(syns) <- syns[1, ] - syns <- syns[-1, ] - syns <- syns[syns[ , 2] == 'SYNONYM' & !is.na(syns[ , 2]), ] - syns <- syns[ , -2] - names(syns) <- c('name', 'language') + syns <- tabs[[2]][c(1, 3, 4)] + colnames(syns) <- syns[1, ] + syns <- syns[-1, ] + syns <- syns[syns[ , 2] == 'SYNONYM' & !is.na(syns[ , 2]), ] + syns <- syns[ , -2] + names(syns) <- c('name', 'language') - out <- list(cas = cas, ec = ec, gsbl = gsbl, synonyms = syns, - source_url = qurl) - return(out) + out <- list(cas = cas, ec = ec, gsbl = gsbl, synonyms = syns, + source_url = qurl) + return(out) - # CODE FOR A POSSIBLE FUTURE RELEASE - # binf <- tabs[[length(tabs)]] - # cas <- binf[, 1][binf[, 2] == 'CAS'] - # ec <- binf[, 1][grepl('^EC$|EINEC', binf[, 2])] - # gsbl <- binf[, 1][binf[, 2] == 'GSBL'] - # - # syns <- tabs[[2]][c(1, 3, 4)] - # names(syns) <- tolower(gsub('\\s+', '_', names(syns))) - # group <- tolower(syns[ syns$substance_name_typ == 'GROUP_USE' & - # syns$language == 'English', ]$notation) - # syn <- syns[ syns$substance_name_typ == 'SYNONYM', ] - # syn <- syn[ ,-2] - # names(syn) <- c('name', 'language') - # # return list of data.frames - # l <- list(cas = cas, - # ec = ec, - # gsbl = gsbl, - # source_url = qurl) - # data <- as.data.frame(t(do.call(rbind, l)), - # stringsAsFactors = FALSE) - # chem_group <- as.data.frame(t(group), stringsAsFactors = FALSE) - # names(chem_group) <- chem_group[1, ] - # chem_group[1, ] <- TRUE - # out <- list(data = data, - # chemical_group = chem_group, - # synonyms = syn) - ### END - } + # CODE FOR A POSSIBLE FUTURE RELEASE + # binf <- tabs[[length(tabs)]] + # cas <- binf[, 1][binf[, 2] == 'CAS'] + # ec <- binf[, 1][grepl('^EC$|EINEC', binf[, 2])] + # gsbl <- binf[, 1][binf[, 2] == 'GSBL'] + # + # syns <- tabs[[2]][c(1, 3, 4)] + # names(syns) <- tolower(gsub('\\s+', '_', names(syns))) + # group <- tolower(syns[ syns$substance_name_typ == 'GROUP_USE' & + # syns$language == 'English', ]$notation) + # syn <- syns[ syns$substance_name_typ == 'SYNONYM', ] + # syn <- syn[ ,-2] + # names(syn) <- c('name', 'language') + # # return list of data.frames + # l <- list(cas = cas, + # ec = ec, + # gsbl = gsbl, + # source_url = qurl) + # data <- as.data.frame(t(do.call(rbind, l)), + # stringsAsFactors = FALSE) + # chem_group <- as.data.frame(t(group), stringsAsFactors = FALSE) + # names(chem_group) <- chem_group[1, ] + # chem_group[1, ] <- TRUE + # out <- list(data = data, + # chemical_group = chem_group, + # synonyms = syn) + ### END + } + else { + return(NA) + } + } out <- lapply(id, foo, verbose = verbose) out <- setNames(out, id) class(out) <- c('etox_basic', 'list') @@ -246,7 +271,6 @@ etox_basic <- function(id, verbose = TRUE) { #' @import xml2 RCurl #' @importFrom utils read.table #' @importFrom stats rgamma -#' #' @param id character; ETOX ID #' @param verbose logical; print message during processing to console? #' @@ -274,57 +298,69 @@ etox_basic <- function(id, verbose = TRUE) { #' #' } etox_targets <- function(id, verbose = TRUE) { - if (!mode(id) %in% c("numeric", "character")) { - stop("id must be a vector!") - } + + if (ping_service("etox") == FALSE) stop(webchem_message("service_down")) + foo <- function(id, verbose) { - on.exit(suppressWarnings(closeAllConnections())) if (is.na(id)) { - message('ID is NA! Returning NA.\n') + if (verbose) webchem_message("na") return(NA) } baseurl <- 'https://webetox.uba.de/webETOX/public/basics/stoff.do?language=en&id=' qurl <- paste0(baseurl, id) - if (verbose) - message('Querying ', qurl) - Sys.sleep(rgamma(1, shape = 15, scale = 1/10)) - tt <- try(read_html(qurl), silent = TRUE) - if (inherits(tt, 'try-error')) { - message('ID not found! Returning NA.\n') + if(verbose) webchem_message("query", id, appendLF = FALSE) + Sys.sleep(stats::rgamma(1, shape = 15, scale = 1/10)) + res <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") return(NA) } - link2 <- - xml_attrs(xml_find_all(tt, "//a[contains(.,'Quali') and contains(@href,'stoff')]"), - 'href') - id2 <- gsub('.*=(\\d+)', '\\1', link2) - - tt2 <- read_html(paste0('https://webetox.uba.de', link2, '&language=en')) - mssg <- - xml_text( - xml_find_all( - tt2, - "//div[contains(@class, 'messages')]/ul/li/span[contains(@class, 'message')]" - ), - trim = TRUE - ) - if (length(mssg) > 0) { - if (grepl('no result', mssg)) { - message('No targets found found! Returning NA.\n') + if (verbose) message(httr::message_for_status(res)) + if(res$status_code == 200){ + tt <- try(read_html(res), silent = TRUE) + if (inherits(tt, 'try-error')) { + if (verbose) webchem_message("not_found") return(NA) - } else { - message('Problem found! Message: \n ', mssg) } - } + link2 <- + xml_attrs(xml_find_all(tt, "//a[contains(.,'Quali') and contains(@href,'stoff')]"), + 'href') + id2 <- gsub('.*=(\\d+)', '\\1', link2) - csvlink <- xml_attr(xml_find_all(tt2, "//a[contains(.,'Csv')]"), 'href') - res <- read.table(paste0('https://webetox.uba.de', csvlink), - header = TRUE, sep = ',', dec = ',', fileEncoding = 'latin1', - stringsAsFactors = FALSE) - res$Value_Target_LR <- as.numeric(res$Value_Target_LR) - source_url <- paste0('https://webetox.uba.de', link2, '&language=en') - source_url <- gsub('^(.*ziel\\.do)(.*)(\\?stoff=.*)$', '\\1\\3', source_url) - out <- list(res = res, source_url = source_url) - return(out) + tt2 <- read_html(paste0('https://webetox.uba.de', link2, '&language=en')) + mssg <- + xml_text( + xml_find_all( + tt2, + "//div[contains(@class, 'messages')]/ul/li/span[contains(@class, 'message')]" + ), + trim = TRUE + ) + if (length(mssg) > 0) { + if (grepl('no result', mssg)) { + if (verbose) webchem_message("not_found") + return(NA) + } else { + if (verbose) message(paste0(" Problem found. Message: ", mssg)) + } + } + csvlink <- xml_attr(xml_find_all(tt2, "//a[contains(.,'Csv')]"), 'href') + res <- read.table(paste0('https://webetox.uba.de', csvlink), + header = TRUE, sep = ',', dec = ',', fileEncoding = 'latin1', + stringsAsFactors = FALSE) + res$Value_Target_LR <- as.numeric(res$Value_Target_LR) + source_url <- paste0('https://webetox.uba.de', link2, '&language=en') + source_url <- gsub('^(.*ziel\\.do)(.*)(\\?stoff=.*)$', '\\1\\3', source_url) + out <- list(res = res, source_url = source_url) + return(out) + } + else { + return(NA) + } } out <- lapply(id, foo, verbose = verbose) out <- setNames(out, id) @@ -340,7 +376,6 @@ etox_targets <- function(id, verbose = TRUE) { #' @import xml2 RCurl #' @importFrom utils read.table #' @importFrom stats rgamma -#' #' @param id character; ETOX ID #' @param verbose logical; print message during processing to console? #' @@ -362,51 +397,64 @@ etox_targets <- function(id, verbose = TRUE) { #' etox_tests( c("20179", "9051")) #' } etox_tests <- function(id, verbose = TRUE) { - if (!mode(id) %in% c("numeric","character")) { - stop("id must be a vector!") - } + + if (ping_service("etox") == FALSE) stop(webchem_message("service_down")) + foo <- function(id, verbose){ - on.exit(suppressWarnings(closeAllConnections())) if (is.na(id)) { - message('ID is NA! Returning NA.\n') + if (verbose) webchem_message("na") return(NA) } - # id <- '20179' baseurl <- 'https://webetox.uba.de/webETOX/public/basics/stoff.do?id=' qurl <- paste0(baseurl, id) - if (verbose) - message('Querying ', qurl) - Sys.sleep( rgamma(1, shape = 15, scale = 1/10)) - tt <- try(read_html(qurl), silent = TRUE) - if (inherits(tt, 'try-error')) { - message('ID not found! Returning NA.\n') + if(verbose) webchem_message("query", id, appendLF = FALSE) + Sys.sleep(stats::rgamma(1, shape = 15, scale = 1/10)) + res <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") return(NA) } - link2 <- xml_attrs(xml_find_all(tt, "//a[contains(.,'Tests') and contains(@href,'stoff')]"), 'href') - id2 <- gsub('.*=(\\d+)', '\\1', link2) - - tt2 <- read_html(paste0('https://webetox.uba.de', link2, '&language=en')) - mssg <- xml_text(xml_find_all(tt2, "//div[contains(@class, 'messages')]/ul/li/span[contains(@class, 'message')]"), trim = TRUE) - if (length(mssg) > 0) { - if (grepl('no result', mssg)) { - message('No targets found found! Returning NA.\n') + if (verbose) message(httr::message_for_status(res)) + if (res$status_code == 200) { + tt <- try(read_html(res), silent = TRUE) + if (inherits(tt, 'try-error')) { + webchem_message("not_found") return(NA) - } else { - message('Problem found! Message: \n ', mssg) } - } - csvlink <- xml_attr(xml_find_all(tt2, "//a[contains(.,'Csv')]"), 'href') + link2 <- xml_attrs(xml_find_all(tt, "//a[contains(.,'Tests') and contains(@href,'stoff')]"), 'href') + id2 <- gsub('.*=(\\d+)', '\\1', link2) + tt2 <- read_html(paste0('https://webetox.uba.de', link2, '&language=en')) + mssg <- xml_text(xml_find_all(tt2, "//div[contains(@class, 'messages')]/ul/li/span[contains(@class, 'message')]"), trim = TRUE) + if (length(mssg) > 0) { + if (grepl('no result', mssg)) { + if (verbose) message(" No targets found. Returning NA.") + return(NA) + } else { + if (verbose) message(paste0(" Problem found. Message: ", mssg), + appendLF = FALSE) + } + } + csvlink <- xml_attr(xml_find_all(tt2, "//a[contains(.,'Csv')]"), 'href') - # csvlink <- gsub('^(.*\\.do).*$', '\\1', csvlink) - res <- read.table(paste0('https://webetox.uba.de', csvlink), - header = TRUE, sep = ',', dec = ',', fileEncoding = 'latin1', - stringsAsFactors = FALSE) - res$Value <- as.numeric(res$Value) + # csvlink <- gsub('^(.*\\.do).*$', '\\1', csvlink) + res2 <- read.table(paste0('https://webetox.uba.de', csvlink), + header = TRUE, sep = ',', dec = ',', + fileEncoding = 'latin1', + stringsAsFactors = FALSE) + res2$Value <- as.numeric(res2$Value) - source_url <- paste0('https://webetox.uba.de', link2, '&language=en') - source_url <- gsub('^(.*test\\.do)(.*)(\\?stoff=.*)$', '\\1\\3', source_url) - out <- list(res = res, source_url = source_url) - return(out) + source_url <- paste0('https://webetox.uba.de', link2, '&language=en') + source_url <- gsub('^(.*test\\.do)(.*)(\\?stoff=.*)$', '\\1\\3', source_url) + out <- list(res = res2, source_url = source_url) + return(out) + } + else { + return(NA) + } } out <- lapply(id, foo, verbose = verbose) out <- setNames(out, id) diff --git a/R/flavornet.R b/R/flavornet.R index d0f4523b..273f844c 100644 --- a/R/flavornet.R +++ b/R/flavornet.R @@ -5,7 +5,6 @@ #' #' @import xml2 #' @importFrom stats rgamma -#' #' @param query character; CAS number to search by. See \code{\link{is.cas}} for correct formatting #' @param from character; currently only CAS numbers are accepted. #' @param verbose logical; should a verbose output be printed on the console? @@ -28,29 +27,44 @@ fn_percept <- function(query, from = "cas", verbose = TRUE, CAS, ...) { + + if (ping_service("fn") == FALSE) stop(webchem_message("service_down")) + if (!missing(CAS)) { - warning('"CAS" is now deprecated. Please use "query" instead. ') + message('"CAS" is now deprecated. Please use "query" instead. ') query <- CAS } match.arg(from) foo <- function (query, verbose){ - if (is.na(query)) return(NA) - on.exit(suppressWarnings(closeAllConnections())) + if (is.na(query)) { + if (verbose) webchem_message("na") + return(NA) + } qurl <- paste0("http://www.flavornet.org/info/",query,".html") - if (verbose) - message(qurl) - Sys.sleep(rgamma(1, shape = 10, scale = 1/10)) - h <- try(read_html(qurl), silent = TRUE) - if (inherits(h, "try-error")) { - warning("CAS not found... Returning NA.") + if(verbose) webchem_message("query", query, appendLF = FALSE) + Sys.sleep(stats::rgamma(1, shape = 10, scale = 1/10)) + res <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") + return(NA) + } + if (verbose) message(httr::message_for_status(res)) + if (res$status_code == 200){ + h <- read_html(res) + doc.text = xml_text(xml_find_all(h, "/html/body/p[3]")) + pattern = "Percepts: " + percept <- gsub(pattern, "", doc.text) + return(percept) + } + else { return(NA) } - doc.text = xml_text(xml_find_all(h, "/html/body/p[3]")) - pattern = "Percepts: " - percept <- gsub(pattern, "", doc.text) - return(percept) } percepts <- sapply(query, foo, verbose = verbose) percepts <- setNames(percepts, query) return(percepts) -} \ No newline at end of file +} diff --git a/R/integration.R b/R/integration.R index 9b406cb5..c61cb086 100644 --- a/R/integration.R +++ b/R/integration.R @@ -136,4 +136,4 @@ find_db <- function(query, from, } } return(out) -} \ No newline at end of file +} diff --git a/R/nist.R b/R/nist.R index e58054a8..69517887 100644 --- a/R/nist.R +++ b/R/nist.R @@ -5,6 +5,7 @@ #' @param type what kind of RI #' @param polarity polar or non-polar #' @param temp_prog what kind of temperature program +#' @param verbose logical; should a verbose output be printed on the console? #' @noRd #' @import rvest #' @import xml2 @@ -15,8 +16,10 @@ get_ri_xml <- from, type, polarity, - temp_prog) { - on.exit(suppressWarnings(closeAllConnections())) + temp_prog, + verbose) { + + if (ping_service("nist") == FALSE) stop(webchem_message("service_down")) from_str <- (switch( from, @@ -30,6 +33,7 @@ get_ri_xml <- #handle NAs if (is.na(query)) { + if (verbose) webchem_message("na") return(NA) } else { if (from == "cas") { @@ -37,69 +41,91 @@ get_ri_xml <- } else { qurl <- paste0(baseurl, "?", from_str, "=", query, "&Units=SI") Sys.sleep(rgamma(1, shape = 15, scale = 1/10)) - page <- httr::with_config( - user_agent('webchem (https://github.com/ropensci/webchem)'), - xml2::read_html(qurl) - ) - - #Warnings - result <- page %>% - html_node("main h1") %>% - html_text() - # if cquery not found - if (stringr::str_detect(result, "Not Found")) { - warning(paste0("'", query, "' not found. Returning NA.")) - ri_xml <- tibble(query = query) - } - # if more than one compound found - if (result == "Search Results") { - warning(paste0("More than one match for '", query, - "'. Returning NA.")) + if (verbose) webchem_message("query", query, appendLF = FALSE) + res <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") return(NA) } - links <- - page %>% - rvest::html_nodes("li li a") %>% - rvest::html_attr("href") + if (verbose) message(httr::message_for_status(res)) + if (res$status_code == 200) { + page <- xml2::read_html(res) + + #Warnings + result <- page %>% + html_node("main h1") %>% + html_text() + # if cquery not found + if (stringr::str_detect(result, "Not Found")) { + if (verbose) webchem_message("not_found", appendLF = FALSE) + ri_xml <- tibble(query = query) + } + # if more than one compound found + if (result == "Search Results") { + message(paste0(" More than one match for '", query, + "'. Returning NA.")) + return(NA) + } + links <- + page %>% + rvest::html_nodes("li li a") %>% + rvest::html_attr("href") - gaschrom <- links[which(regexpr("Gas-Chrom", links) >= 1)] + gaschrom <- links[which(regexpr("Gas-Chrom", links) >= 1)] - if (length(gaschrom) == 0) { - warning(paste0( - "There are no chromatography data for '", - query, - "'. Returning NA." - )) + if (length(gaschrom) == 0) { + if (verbose) webchem_message("not_available") + return(NA) + } else { + if (verbose) message(" CAS found. ", appendLF = FALSE) + ID <- stringr::str_extract(gaschrom, "(?<=ID=).+?(?=&)") + } + } + else { return(NA) - } else { - ID <- stringr::str_extract(gaschrom, "(?<=ID=).+?(?=&)") } } #scrape RI table type_str <- toupper(paste(type, "RI", polarity, temp_prog, sep = "-")) qurl <- paste0(baseurl, "?ID=", ID, "&Units-SI&Mask=2000&Type=", type_str) - Sys.sleep(rgamma(1, shape = 15, scale = 1/10)) - page <- - httr::with_config( - user_agent('webchem (https://github.com/ropensci/webchem)'), - xml2::read_html(qurl) - ) - ri_xml.all <- html_nodes(page, ".data") + if (verbose) { + if (from == "cas") { + webchem_message("query", query, appendLF = FALSE) + } + else { + webchem_message("query", ID, appendLF = FALSE) + } + } + res2 <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res2, "try-error")) { + if (verbose) webchem_message("service_down") + return(NA) + } + if (verbose) message(httr::message_for_status(res2)) + if (res2$status_code == 200) { + page <- xml2::read_html(res2) + ri_xml.all <- html_nodes(page, ".data") - #Warn if table doesn't exist at URL - if (length(ri_xml.all) == 0) { - warning(paste0( - "There are no RIs for ", - query, - " of type ", - type_str, - ". Returning NA." - )) + #message if table doesn't exist at URL + if (length(ri_xml.all) == 0) { + if (verbose) webchem_message("not_available") + return(NA) + } else { + ri_xml <- ri_xml.all + } + } + else { return(NA) - } else { - ri_xml <- ri_xml.all } } #set attributes to label what type of RI @@ -251,6 +277,7 @@ tidy_ritable <- function(ri_xml) { #' @param temp_prog Temperature program. One of \code{"isothermal"}, #' \code{"ramp"}, or \code{"custom"}. #' @param cas deprecated. Use \code{query} instead. +#' @param verbose logical; should a verbose output be printed on the console? #' @details The types of retention indices included in NIST include Kovats #' (\code{"kovats"}), Van den Dool and Kratz (\code{"linear"}), normal alkane #' (\code{"alkane"}), and Lee (\code{"lee"}). Details about how these are @@ -293,7 +320,8 @@ nist_ri <- function(query, type = c("kovats", "linear", "alkane", "lee"), polarity = c("polar", "non-polar"), temp_prog = c("isothermal", "ramp", "custom"), - cas = NULL) { + cas = NULL, + verbose = TRUE) { if (!is.null(cas)) { warning("`cas` is deprecated. Using `query` instead with `from = 'cas'`.") @@ -314,7 +342,8 @@ nist_ri <- function(query, from = from, type = type, polarity = polarity, - temp_prog = temp_prog + temp_prog = temp_prog, + verbose = verbose ) ) @@ -324,6 +353,5 @@ nist_ri <- function(query, ri_tables <- purrr::map_dfr(ri_xmls, tidy_ritable, .id = "query") %>% dplyr::mutate(query = na_if(query, ".NA")) - return(ri_tables) -} \ No newline at end of file +} diff --git a/R/opsin.R b/R/opsin.R index a8726363..63fab53d 100644 --- a/R/opsin.R +++ b/R/opsin.R @@ -25,40 +25,50 @@ #' @export opsin_query <- function(query, verbose = TRUE, ...){ - # query <- 'cyclopropane' - foo <- function(query, verbose){ - on.exit(suppressWarnings(closeAllConnections())) + if (ping_service("opsin") == FALSE) stop(webchem_message("service_down")) + foo <- function(query, verbose){ empty <- c(query, rep(NA, 6)) names(empty) <- c("query", "inchi", "stdinchi", "stdinchikey", "smiles", "message", "status") empty <- as_tibble(t(empty)) if (is.na(query)) { + if (verbose) webchem_message("na") return(empty) } query_u <- URLencode(query, reserved = TRUE) baseurl <- "http://opsin.ch.cam.ac.uk/opsin/" out <- 'json' qurl <- paste0(baseurl, query_u, '.', out) - if (verbose) - message('Querying ', URLdecode(query_u)) + if (verbose) webchem_message("query", query, appendLF = FALSE) Sys.sleep( rgamma(1, shape = 5, scale = 1/10)) - h <- try(GET(qurl), silent = TRUE) - if (inherits(h, "try-error")) { - warning('Problem with web service encountered... Returning NA.') + res <- try(httr::RETRY("GET", + qurl, + user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") return(empty) } - cont <- content(h, as = 'text') - if (substr(cont, 1, 14) == ' ' - - Sys.sleep(rgamma(1, shape = 5, scale = 1/10)) - res <- try(POST(baseurl, - add_headers(headers), - body = body, - user_agent("webchem (https://github.com/ropensci/webchem)"))) - if (inherits(res, "try-error")) + res <- try(httr::RETRY("POST", + baseurl, + add_headers(headers), + body = body, + httr::user_agent(webchem_url()), + terminate_on = 400, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { return(FALSE) - res$status_code == 200 + } + return(res$status_code == 200) } @@ -173,10 +183,17 @@ ping_pubchem <- function(...) { input <- paste0('/compound/', from) output <- '/synonyms/JSON' qurl <- paste0(prolog, input, output) - - res <- POST(qurl, body = paste0(from, '=', query), ...) - stopifnot(is(res, "response")) - res$status_code == 200 + res <- try(httr::RETRY("POST", + qurl, + body = paste0(from, '=', query), + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE, + ...), silent = TRUE) + if (inherits(res, "try-error")) { + return(FALSE) + } + return(res$status_code == 200) } # pubchem PUG-VIEW----------------------------------------------------------------- @@ -191,7 +208,13 @@ ping_pubchem <- function(...) { ping_pubchem_pw <- function(...) { qurl <- paste("https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data", "compound/176/JSON?heading=pka", sep = "/") - res <- POST(qurl, - user_agent("webchem (https://github.com/ropensci/webchem)")) - res$status_code == 200 + res <- try(httr::RETRY("POST", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + return(FALSE) + } + return(res$status_code == 200) } diff --git a/R/pubchem.R b/R/pubchem.R index d8bed8ef..38256c67 100644 --- a/R/pubchem.R +++ b/R/pubchem.R @@ -123,6 +123,9 @@ get_cid <- arg = NULL, first = NULL, ...) { + + if (ping_service("pc") == FALSE) stop(webchem_message("service_down")) + #deprecate `first` if (!is.null(first) && first == TRUE) { message("`first = TRUE` is deprecated. Use `match = 'first'` instead") @@ -171,12 +174,10 @@ get_cid <- match <- match.arg(match) foo <- function(query, from, domain, match, verbose, arg, ...) { if (is.na(query)) { - if (verbose) message(paste0(query, " is invalid. Returning NA.")) - return(NA) - } - if (verbose) { - message(paste0("Querying ", query, ". "), appendLF = FALSE) + if (verbose) webchem_message("na") + return(tibble::tibble("query" = NA, "cid" = NA)) } + if (verbose) webchem_message("query", query, appendLF = FALSE) if (is.character(query)) query <- URLencode(query, reserved = TRUE) if (from %in% structure_search) { qurl <- paste("https://pubchem.ncbi.nlm.nih.gov/rest/pug", @@ -191,11 +192,23 @@ get_cid <- if (from == "inchi") { qurl <- paste("https://pubchem.ncbi.nlm.nih.gov/rest/pug", domain, from, "cids", "json", sep = "/") - res <- httr::POST(qurl, body = paste0("inchi=", query), - user_agent("webchem")) + res <- try(httr::RETRY("POST", + qurl, + user_agent(webchem_url()), + body = paste0("inchi=", query), + terminate_on = 404, + quiet = TRUE), silent = TRUE) } else { - res <- httr::POST(qurl, user_agent("webchem")) + res <- try(httr::RETRY("POST", + qurl, + user_agent(webchem_url()), + terminate_on = c(202, 404), + quiet = TRUE), silent = TRUE) + } + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") + return(tibble::tibble("query" = query, "cid" = NA)) } if (res$status_code != 200) { if (res$status_code == 202) { @@ -205,16 +218,24 @@ get_cid <- "listkey", listkey, "cids", "json", sep = "/") while (res$status_code == 202) { Sys.sleep(5 + rgamma(1, shape = 15, scale = 1 / 10)) - res <- httr::POST(qurl, user_agent("webchem")) + res <- try(httr::RETRY("POST", + qurl, + user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") + return(tibble::tibble("query" = query, "cid" = NA)) + } } if (res$status_code != 200) { if (verbose) message(httr::message_for_status(res)) - return(NA) + return(tibble::tibble("query" = query, "cid" = NA)) } } else{ if (verbose) message(httr::message_for_status(res)) - return(NA) + return(tibble::tibble("query" = query, "cid" = NA)) } } if (verbose) message(httr::message_for_status(res)) @@ -231,16 +252,12 @@ get_cid <- out <- unique(unlist(cont)) out <- matcher(x = out, query = query, match = match, verbose = verbose) out <- as.character(out) - names(out) <- NULL - return(out) + return(tibble::tibble("query" = query, "cid" = out)) } out <- map(query, ~foo(query = .x, from = from, domain = domain, match = match, verbose = verbose, arg = arg)) - out <- setNames(out, query) - out <- - lapply(out, enframe, name = NULL, value = "cid") %>% - bind_rows(.id = "query") + out <- dplyr::bind_rows(out) return(out) } @@ -299,7 +316,13 @@ get_cid <- #' "CanonicalSMILES")) #' } pc_prop <- function(cid, properties = NULL, verbose = TRUE, ...) { - # cid <- c("5564", "7843") + + if (ping_service("pc") == FALSE) stop(webchem_message("service_down")) + + if (mean(is.na(cid)) == 1) { + if (verbose) webchem_message("na") + return(NA) + } napos <- which(is.na(cid)) cid_o <- cid cid <- cid[!is.na(cid)] @@ -326,44 +349,51 @@ pc_prop <- function(cid, properties = NULL, verbose = TRUE, ...) { output <- paste0("/property/", properties, "/JSON") qurl <- paste0(prolog, input, output) - if (verbose) - message(qurl) + if (verbose) webchem_message("query_all", appendLF = FALSE) Sys.sleep(0.2) - cont <- try(content(POST(qurl, - body = list("cid" = paste(cid, collapse = ",") - )), - type = "text", encoding = "UTF-8"), - silent = TRUE - ) - if (inherits(cont, "try-error")) { - warning("Problem with web service encountered... Returning NA.") + res <- try(httr::RETRY("POST", + qurl, + httr::user_agent(webchem_url()), + body = list("cid" = paste(cid, collapse = ",")), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") return(NA) } - cont <- fromJSON(cont) - if (names(cont) == "Fault") { - warning(cont$Fault$Message, ". ", cont$Fault$Details, ". Returning NA.") + if (verbose) message(httr::message_for_status(res)) + if (res$status_code == 200) { + cont <- jsonlite::fromJSON(rawToChar(res$content)) + if (names(cont) == "Fault") { + if (verbose) { + message(cont$Fault$Message, ". ", cont$Fault$Details, ". Returning NA.") + } + return(NA) + } + out <- cont$PropertyTable[[1]] + # insert NA rows + narow <- rep(NA, ncol(out)) + for (i in seq_along(napos)) { + #capture NAs at beginning + firstnna <- min(which(!is.na(cid_o))) + if (napos[i] < firstnna) { + out <- rbind(narow, out) + } else { + # capture NAs at end + if (napos[i] > nrow(out)) { + # print(napos[i]) + out <- rbind(out, narow) + } else { + out <- rbind(out[1:(napos[i] - 1), ], narow, out[napos[i]:nrow(out), ]) + } + }} + rownames(out) <- NULL + class(out) <- c("pc_prop", "data.frame") + return(out) + } + else { return(NA) } - out <- cont$PropertyTable[[1]] - # insert NA rows - narow <- rep(NA, ncol(out)) - for (i in seq_along(napos)) { - #capture NAs at beginning - firstnna <- min(which(!is.na(cid_o))) - if (napos[i] < firstnna) { - out <- rbind(narow, out) - } else { - # capture NAs at end - if (napos[i] > nrow(out)) { - # print(napos[i]) - out <- rbind(out, narow) - } else { - out <- rbind(out[1:(napos[i] - 1), ], narow, out[napos[i]:nrow(out), ]) - } - }} - rownames(out) <- NULL - class(out) <- c("pc_prop", "data.frame") - return(out) } #' Search synonyms in pubchem @@ -420,6 +450,9 @@ pc_synonyms <- function(query, match = c("all", "first", "ask", "na"), verbose = TRUE, arg = NULL, choices = NULL, ...) { + + if (ping_service("pc") == FALSE) stop(webchem_message("service_down")) + # from can be cid | name | smiles | inchi | sdf | inchikey | formula # query <- c("Aspirin") # from = "name" @@ -428,33 +461,43 @@ pc_synonyms <- function(query, if (!missing("choices")) stop("'choices' is deprecated. Use 'match' instead.") foo <- function(query, from, verbose, ...) { - if (is.na(query)) return(NA) + if (is.na(query)) { + if (verbose) webchem_message("na") + return(NA) + } prolog <- "https://pubchem.ncbi.nlm.nih.gov/rest/pug" input <- paste0("/compound/", from) output <- "/synonyms/JSON" if (!is.null(arg)) arg <- paste0("?", arg) qurl <- paste0(prolog, input, output, arg) - if (verbose) - message(qurl) + if (verbose) webchem_message("query", query, appendLF = FALSE) Sys.sleep(0.2) - cont <- try(content(POST(qurl, - body = paste0(from, "=", query) - )), silent = TRUE - ) - if (inherits(cont, "try-error")) { - warning("Problem with web service encountered... Returning NA.") + res <- try(httr::RETRY("POST", + qurl, + httr::user_agent(webchem_url()), + body = paste0(from, "=", query), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") return(NA) } - if (names(cont) == "Fault") { - warning(cont$Fault$Details, ". Returning NA.") + if (verbose) message(httr::message_for_status(res)) + if (res$status_code == 200){ + cont <- httr::content(res) + if (names(cont) == "Fault") { + message(cont$Fault$Details, ". Returning NA.") + return(NA) + } + out <- unlist(cont)[-1] #first result is always an ID number + names(out) <- NULL + + out <- matcher(out, query = query, match = match, verbose = verbose) + } + else { return(NA) } - out <- unlist(cont)[-1] #first result is always an ID number - names(out) <- NULL - - out <- matcher(out, query = query, match = match, verbose = verbose) - } out <- lapply(query, foo, from = from, verbose = verbose) out <- setNames(out, query) @@ -509,7 +552,6 @@ pc_synonyms <- function(query, #' pc_sect(780286, "modify date", "assay") #' pc_sect(9023, "Ensembl ID", "gene") #' pc_sect("1ZHY_A", "Sequence", "protein") -#' pc_sect("US2013040379", "Patent Identifier Synonyms", "patent") #' } #' @export pc_sect <- function(id, @@ -565,24 +607,31 @@ pc_page <- function(id, domain = c("compound", "substance", "assay", "gene", "protein", "patent"), verbose = TRUE) { + + if (ping_service("pc") == FALSE) stop(webchem_message("service_down")) + domain <- match.arg(domain) section <- tolower(gsub(" +", "+", section)) foo <- function(id, section, domain) { - qurl <- paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/", - domain, "/", id, "/JSON?heading=", section) - if (verbose == TRUE) message("Searching ", id, ". ", appendLF = FALSE) if (is.na(id)) { - if (verbose == TRUE) { - message("Invalid input. Returning NA.") - } + if (verbose) webchem_message("na") return(NA) } + qurl <- paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/", + domain, "/", id, "/JSON?heading=", section) + if (verbose) webchem_message("query", id, appendLF = FALSE) Sys.sleep(0.3 + stats::rexp(1, rate = 10 / 0.3)) - res <- httr::POST( - qurl, - user_agent("webchem (https://github.com/ropensci/webchem)")) - if (res$status_code < 300) { - if (verbose == TRUE) message(httr::message_for_status(res)) + res <- try(httr::RETRY("POST", + qurl, + user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") + return(NA) + } + if (verbose) message(httr::message_for_status(res)) + if (res$status_code == 200) { cont <- httr::content(res, type = "text", encoding = "UTF-8") cont <- jsonlite::fromJSON(cont, simplifyDataFrame = FALSE) tree <- data.tree::as.Node(cont, nameName = "TOCHeading") @@ -590,9 +639,6 @@ pc_page <- function(id, return(tree) } else { - if (verbose == TRUE) { - message(paste0(httr::message_for_status(res), " Returning NA.")) - } return(NA) } } diff --git a/R/srs.R b/R/srs.R index e64605ed..d1f4c4a0 100644 --- a/R/srs.R +++ b/R/srs.R @@ -5,6 +5,7 @@ #'@param query character; query ID. #'@param from character; type of query ID, e.g. \code{'itn'} , \code{'cas'}, #' \code{'epaid'}, \code{'tsn'}, \code{'name'}. +#'@param verbose logical; should a verbose output be printed on the console? #'@param ... not currently used. #'@return a list of lists (for each supplied query): a list of 22. subsKey, #' internalTrackingNumber, systematicName, epaIdentificationNumber, @@ -26,22 +27,41 @@ #' } srs_query <- function(query, - from = c("itn", "cas", "epaid", "tsn", "name"), ...) { + from = c("itn", "cas", "epaid", "tsn", "name"), + verbose = TRUE, ...) { + + if (ping_service("srs") == FALSE) stop(webchem_message("service_down")) + + from <- match.arg(from) entity_url <- "https://cdxnodengn.epa.gov/cdx-srs-rest/" rst <- lapply(query, function(x) { + if (is.na(x)){ + if (verbose) webchem_message("na") + return(NA) + } entity_query <- paste0(entity_url, "/substance/", from, "/", x) - response <- httr::GET(entity_query) - + if (verbose) webchem_message("query", x, appendLF = FALSE) + response <- try(httr::RETRY("GET", + entity_query, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(response, "try-error")) { + if (verbose) webchem_message("service_down") + return(NA) + } + if (verbose) message(httr::message_for_status(response)) if (response$status_code == 200) { text_content <- httr::content(response, "text") if (text_content == "[]") { + if (verbose) webchem_message("not_available") return(NA) } else { jsonlite::fromJSON(text_content) } } else { - stop(httr::http_status(response)$message) + return(NA) } }) names(rst) <- query diff --git a/R/utils.R b/R/utils.R index 87568f0b..a585c6b8 100644 --- a/R/utils.R +++ b/R/utils.R @@ -67,21 +67,38 @@ is.inchikey = function(x, type = c('format', 'chemspider'), verbose = TRUE) { #' is.inchikey_cs('BQJCRHHNABKAKU-KBQPJGBKSB-N') #' } is.inchikey_cs <- function(x, verbose = TRUE){ - # x <- 'BQJCRHHNABKAKU-KBQPJGBKSA' + + if (ping_service("cs_web") == FALSE) stop(webchem_message("service_down")) + if (length(x) > 1) { stop('Cannot handle multiple input strings.') } + if (is.na(x)) { + if (verbose) webchem_message("na") + return(NA) + } baseurl <- 'http://www.chemspider.com/InChI.asmx/IsValidInChIKey?' qurl <- paste0(baseurl, 'inchi_key=', x) Sys.sleep(0.1) - h <- try(read_xml(qurl), silent = TRUE) - if (inherits(h, "try-error")) { - warning('Problem with webservice... Returning NA.') - out <- NA - } else { + if (verbose) webchem_message("query", x, appendLF = FALSE) + res <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") + return(NA) + } + if (verbose) message(httr::message_for_status(res)) + if (res$status_code == 200){ + h <- xml2::read_xml(res) out <- as.logical(xml_text(h)) + return(out) + } + else { + return(NA) } - return(out) } @@ -461,28 +478,22 @@ matcher <- if(length(x) == 1) { return(x) } else { - if(verbose) { - message("More then one Link found for '", query, "'. \n") - } + if(verbose) message(" Multiple found. ", appendLF = FALSE) if(match == "all") { - if(verbose) { - message("Returning all matches. \n") - } + if(verbose) message("Returning all.") return(x) + } - } else if (match == "best") { + else if (match == "best") { #check that x and result are same length if(length(x) != length(result)) stop("Can't use match = 'best' without query matches for each output") - if (verbose) { - message("Returning best match. \n") - } + if (verbose) message("Returning best.") dd <- adist(query, result) / nchar(result) return(x[which.min(dd)]) } else if (match == "first") { - if (verbose) - message("Returning first match. \n") + if (verbose) message("Returning first.") return(x[1]) } else if (match == "ask" & interactive()) { @@ -495,12 +506,45 @@ matcher <- return(x[pick]) } else if (match == "na") { - if (verbose) { - message("Returning NA. \n") - } + if (verbose) message("Returning NA.") x <- NA names(x)<-NA return(x) } } } + +#' Webchem messages +#' +#' Webchem spacific messages to be used in verbose messages. +#' @noRd +webchem_message <- function(action = c("na", + "query", + "query_all", + "not_found", + "not_available", + "service_down"), + appendLF = TRUE, + ...) { + action <- match.arg(action) + string <- switch( + action, + na = "Query is NA. Returning NA.", + query = paste0("Querying ", ..., ". "), + query_all = "Querying. ", + not_found = " Not found. Returning NA.", + not_available = " Not available. Returning NA.", + service_down = " Service not available. Returning NA." + ) + message(string, appendLF = FALSE) + if (appendLF) message("") +} + +#' Webchem URL +#' +#' URL of the webchem package to be used in httr::user_agent() +#' @noRd +webchem_url <- function() { + url <- "https://cran.r-project.org/web/packages/webchem/index.html" + return(url) +} diff --git a/R/wikidata.R b/R/wikidata.R index 3b0ca7f4..366a010a 100644 --- a/R/wikidata.R +++ b/R/wikidata.R @@ -40,12 +40,15 @@ get_wdid <- verbose = TRUE, language = 'en') { + if (ping_service("wd") == FALSE) stop(webchem_message("service_down")) + # language <- 'en' # query <- 'Triclosan' match <- match.arg(match) foo <- function(query, language, match, verbose){ if (is.na(query)){ + if (verbose) webchem_message("na") id <- NA matched_sub <- NA } else { @@ -54,44 +57,56 @@ get_wdid <- qurl <- paste0("wikidata.org/w/api.php?action=wbsearchentities&format=json&type=item") qurl <- paste0(qurl, "&language=", language, "&limit=", limit, "&search=", query1) - if (verbose) - message('Querying ', qurl) + if (verbose) webchem_message("query", query, appendLF = FALSE) Sys.sleep(0.3) - cont <- - fromJSON(content(GET( - qurl, - user_agent('webchem (https://github.com/ropensci/webchem)') - ), 'text')) - search <- cont$search - if (length(search) == 0) { - if (verbose) - message('Substance not found! Returing NA. \n') - id <- NA - matched_sub <- NA - } else { - # use only matches on label - search <- search[search$match$type %in% c('label', 'alias'), ] - # # check matches - search <- search[tolower(iconv(search$match$text, - "latin1", - "ASCII", - sub = "")) == tolower(query), ] - - if (nrow(search) > 1) { - id <- - matcher( - search$id, - query = query, - result = search$label, - match = match, - verbose = verbose - ) - matched_sub <- names(id) + res <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") + return(tibble::tibble(query = query, match = NA, wdid = NA)) + } + if (verbose) message(httr::message_for_status(res)) + if (res$status_code == 200) { + cont <- jsonlite::fromJSON(httr::content(res, + type = "text", + encoding = "utf-8")) + search <- cont$search + if (length(search) == 0) { + if (verbose) webchem_message("not_found") + id <- NA + matched_sub <- NA } else { - id <- search$id - matched_sub <- search$label + # use only matches on label + search <- search[search$match$type %in% c('label', 'alias'), ] + # # check matches + search <- search[tolower(iconv(search$match$text, + "latin1", + "ASCII", + sub = "")) == tolower(query), ] + + if (nrow(search) > 1) { + id <- + matcher( + search$id, + query = query, + result = search$label, + match = match, + verbose = verbose + ) + matched_sub <- names(id) + } else { + id <- search$id + matched_sub <- search$label + } } } + else { + id <- NA + matched_sub <- NA + } } out <- tibble(query = query, match = matched_sub, wdid = id) return(out) @@ -142,16 +157,19 @@ get_wdid <- #' wd_ident(id) #' } wd_ident <- function(id, verbose = TRUE){ + + if (ping_service("wd") == FALSE) stop(webchem_message("service_down")) + # id <- c( "Q163648", "Q18216") # id <- 'Q408646' foo <- function(id, verbose){ + empty <- as.list(rep(NA, 13)) + names(empty) <- c("smiles", "cas", "cid", "einecs", "csid", "inchi", + "inchikey", "drugbank", "zvg", "chebi", "chembl", "unii", + "source_url") if (is.na(id)) { - if (verbose) - message('NA as input! Returing NA. \n') - out <- as.list(rep(NA, 13)) - names(out) <- c("smiles", "cas", "cid", "einecs", "csid", "inchi", "inchikey", - "drugbank", "zvg", "chebi", "chembl", "unii", "source_url") - return(out) + if (verbose) webchem_message("na") + return(empty) } baseurl <- 'https://query.wikidata.org/sparql?format=json&query=' props <- c('P233', 'P231', 'P662', 'P232', 'P661', 'P234', 'P235', 'P715', 'P679', @@ -168,38 +186,50 @@ wd_ident <- function(id, verbose = TRUE){ qurl <- paste0(baseurl, sparql) qurl <- URLencode(qurl) Sys.sleep( rgamma(1, shape = 15, scale = 1/10)) - if (verbose) - message('Querying ', qurl) - res <- GET(qurl) - tmp <- fromJSON(content(res, as = "text")) - - vars_out <- tmp$head$vars - out <- tmp$results$bindings - - if (length(out) == 0) { - if (verbose) - message('Not found! Returing NA. \n') - out <- as.list(rep(NA, 13)) - names(out) <- c(vars_out, 'source_url') - return(out) + if (verbose) webchem_message("query", id, appendLF = FALSE) + res <- try(httr::RETRY("GET", + qurl, + httr::user_agent(webchem_url()), + terminate_on = 404, + quiet = TRUE), silent = TRUE) + if (inherits(res, "try-error")) { + if (verbose) webchem_message("service_down") + return(empty) } + if (verbose) message(httr::message_for_status(res)) + if (res$status_code == 200) { + tmp <- fromJSON(content(res, as = "text")) - if (nrow(out) > 1) { - warning("More then one unique entry found! Returning first.") - out <- out[1, ] - } + vars_out <- tmp$head$vars + out <- tmp$results$bindings + + if (length(out) == 0) { + if (verbose) webchem_message("not_found") + out <- as.list(rep(NA, 13)) + names(out) <- c(vars_out, 'source_url') + return(out) + } + + if (nrow(out) > 1) { + message("More then one unique entry found! Returning first.") + out <- out[1, ] + } - out <- lapply(out, '[[', 'value') + out <- lapply(out, '[[', 'value') - # check for missing entries and add to out-list - miss <- names[!names %in% names(out)] - for (i in miss) { - out[[i]] <- NA + # check for missing entries and add to out-list + miss <- names[!names %in% names(out)] + for (i in miss) { + out[[i]] <- NA + } + out <- out[names] + out[['source_url']] <- paste0('https://www.wikidata.org/wiki/', id) + out <- unlist(out) + return(out) + } + else { + return(empty) } - out <- out[names] - out[['source_url']] <- paste0('https://www.wikidata.org/wiki/', id) - out <- unlist(out) - return(out) } # ugly fixing to return data.frame out <- data.frame(t(sapply(id, foo,verbose = verbose)), stringsAsFactors = FALSE, row.names = seq_along(id)) diff --git a/man/aw_query.Rd b/man/aw_query.Rd index 3cdc9cb2..ffe8e3a9 100644 --- a/man/aw_query.Rd +++ b/man/aw_query.Rd @@ -4,14 +4,7 @@ \alias{aw_query} \title{Query http://www.alanwood.net/pesticides} \usage{ -aw_query( - query, - from = c("name", "cas"), - verbose = TRUE, - force_build = FALSE, - type, - ... -) +aw_query(query, from = c("name", "cas"), verbose = TRUE, type, ...) } \arguments{ \item{query}{character; search string} @@ -20,12 +13,9 @@ aw_query( \item{verbose}{logical; print message during processing to console?} -\item{force_build}{logical; force building a new index? See -\code{\link{build_aw_idx}} for more details.} - \item{type}{deprecated} -\item{...}{currently unused.} +\item{...}{additional arguments to internal utility functions} } \value{ A list of eight entries: common-name, status, preferred IUPAC Name, @@ -59,9 +49,6 @@ Ralf B. Schäfer (2020). webchem: An R Package to Retrieve Chemical Information from the Web. Journal of Statistical Software, 93(13). . } -\seealso{ -\code{\link{build_aw_idx}} -} \author{ Eduard Szöcs, \email{eduardszoecs@gmail.com} } diff --git a/man/build_aw_idx.Rd b/man/build_aw_idx.Rd deleted file mode 100644 index d8075096..00000000 --- a/man/build_aw_idx.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/alanwood.R -\name{build_aw_idx} -\alias{build_aw_idx} -\title{Function to build index} -\source{ -\url{http://www.alanwood.net/pesticides} -} -\usage{ -build_aw_idx(verbose = TRUE, force_build = FALSE) -} -\arguments{ -\item{verbose}{logical; print message during processing to console?} - -\item{force_build}{logical; force building a new index?} -} -\value{ -a data.frame -} -\description{ -This function builds an index of Alan Woods Compendium of Pesticides -\url{http://www.alanwood.net/pesticides} and saves it to -\code{\link{tempdir}}. This is a utility function for -\code{\link{aw_query}} and will not be exported in future releases. -} -\seealso{ -\code{\link{aw_query}}, \code{\link{tempdir}} -} -\author{ -Eduard Szöcs, \email{eduardszoecs@gmail.com} -} diff --git a/man/etox_basic.Rd b/man/etox_basic.Rd index 53b6985e..a3d95bf4 100644 --- a/man/etox_basic.Rd +++ b/man/etox_basic.Rd @@ -35,7 +35,7 @@ out <- etox_basic(ids) out # extract ec numbers -sapply(out, function(y) y$ec) +sapply(out, function(y) y$cas) } } \references{ diff --git a/man/nist_ri.Rd b/man/nist_ri.Rd index 5d192a0c..7376af73 100644 --- a/man/nist_ri.Rd +++ b/man/nist_ri.Rd @@ -10,7 +10,8 @@ nist_ri( type = c("kovats", "linear", "alkane", "lee"), polarity = c("polar", "non-polar"), temp_prog = c("isothermal", "ramp", "custom"), - cas = NULL + cas = NULL, + verbose = TRUE ) } \arguments{ @@ -33,6 +34,8 @@ to get RIs calculated for polar or non-polar columns.} \code{"ramp"}, or \code{"custom"}.} \item{cas}{deprecated. Use \code{query} instead.} + +\item{verbose}{logical; should a verbose output be printed on the console?} } \value{ returns a tibble of literature RIs with the following columns: diff --git a/man/pc_sect.Rd b/man/pc_sect.Rd index fa9ed1fe..bbeaf546 100644 --- a/man/pc_sect.Rd +++ b/man/pc_sect.Rd @@ -63,7 +63,6 @@ pc_sect(2231, "depositor-supplied synonyms", "substance") pc_sect(780286, "modify date", "assay") pc_sect(9023, "Ensembl ID", "gene") pc_sect("1ZHY_A", "Sequence", "protein") -pc_sect("US2013040379", "Patent Identifier Synonyms", "patent") } } \references{ diff --git a/man/srs_query.Rd b/man/srs_query.Rd index 8708b326..c43e9884 100644 --- a/man/srs_query.Rd +++ b/man/srs_query.Rd @@ -4,7 +4,12 @@ \alias{srs_query} \title{Get record details from U.S. EPA Substance Registry Servives (SRS)} \usage{ -srs_query(query, from = c("itn", "cas", "epaid", "tsn", "name"), ...) +srs_query( + query, + from = c("itn", "cas", "epaid", "tsn", "name"), + verbose = TRUE, + ... +) } \arguments{ \item{query}{character; query ID.} @@ -12,6 +17,8 @@ srs_query(query, from = c("itn", "cas", "epaid", "tsn", "name"), ...) \item{from}{character; type of query ID, e.g. \code{'itn'} , \code{'cas'}, \code{'epaid'}, \code{'tsn'}, \code{'name'}.} +\item{verbose}{logical; should a verbose output be printed on the console?} + \item{...}{not currently used.} } \value{ diff --git a/tests/testthat/test-cts.R b/tests/testthat/test-cts.R index 2f513ec0..38ba5b2e 100644 --- a/tests/testthat/test-cts.R +++ b/tests/testthat/test-cts.R @@ -4,12 +4,12 @@ test_that("cts_compinfo()", { skip_if_not(up, "CTS service down") - expect_error(cts_compinfo('xxx')) + expect_true(is.na(cts_compinfo("xxx"))) o1 <- suppressWarnings(cts_compinfo("XEFQLINVKFYRCS-UHFFFAOYSA-N", verbose = FALSE)) o2 <- suppressWarnings(cts_compinfo(c("XEFQLINVKFYRCS-UHFFFAOYSA-N", "XEFQLINVKFYRCS-UHFFFAOYSA-X"), verbose = FALSE)) expect_equal(suppressWarnings(cts_compinfo("XEFQLINVKFYRCS-UHFFFAOYSA-X", verbose = FALSE))[[1]], NA) - expect_warning(cts_compinfo("XEFQLINVKFYRCS-UHFFFAOYSA-X", verbose = FALSE)) + expect_true(is.na(cts_compinfo("XEFQLINVKFYRCS-UHFFFAOYSA-X"))) expect_length(o1[[1]], 10) expect_equal(round(o1[[1]][["molweight"]], 3), 289.542) expect_length(o2, 2) @@ -24,7 +24,7 @@ test_that("cts_convert()", { comp <- c('Triclosan', 'Hexane') expect_error(cts_convert(comp, c('Chemical Name', 'CAS'), 'CAS')) expect_error(cts_convert('Triclosan', 'CAS')) - expect_true(is.na(suppressWarnings(cts_convert('xxxx', 'Chemical Name', 'inchikey'))[[1]])) + expect_true(is.na(cts_convert('xxxx', 'Chemical Name', 'inchikey'))) o1 <- cts_convert(comp, 'Chemical Name', 'inchikey', match = "first", verbose = FALSE) expect_length(o1, 2) diff --git a/tests/testthat/test-etox.R b/tests/testthat/test-etox.R index 4cd1f300..bd640913 100644 --- a/tests/testthat/test-etox.R +++ b/tests/testthat/test-etox.R @@ -155,11 +155,6 @@ test_that("etox integration tests", { ids_b <- get_etoxid(comps, match = 'best') ids_a <- get_etoxid(comps, match = 'all') - # etox_*() can handle only vector inputs - expect_error(etox_basic(ids_b)) - expect_error(etox_targets(ids_b)) - expect_error(etox_tests(ids_b)) - int1 <- etox_basic(ids_b$etoxid) int2 <- etox_targets(ids_b$etoxid) int3 <- etox_tests(ids_b$etoxid) diff --git a/tests/testthat/test-flavornet.R b/tests/testthat/test-flavornet.R index 8af1e81a..acca5d91 100644 --- a/tests/testthat/test-flavornet.R +++ b/tests/testthat/test-flavornet.R @@ -19,6 +19,4 @@ test_that("fn_percept()", { expect_equal(b, structure(c("pungent, ether", "cocoa, roasted nut, roast beef, medicine" ), .Names = c("75-07-0", "123-32-0"))) expect_true(is.na(c[[3]])) - - expect_warning(fn_percept('xxxx')) }) diff --git a/tests/testthat/test-nist.R b/tests/testthat/test-nist.R index 6b488811..366f8213 100644 --- a/tests/testthat/test-nist.R +++ b/tests/testthat/test-nist.R @@ -9,19 +9,6 @@ test_that("NIST webbook is still OK with being scraped", { ) }) -test_that("nist_ri() warns when no results", { - skip_on_cran() - skip_if_not(up, "NIST Web Book is down") - - expect_warning(nist_ri( - "78-70-6", - from = "cas", - type = "linear", - polarity = "non-polar" - ), - regexp = "There are no RIs for 78-70-6") -}) - test_that("nist_ri() works when only one row of data", { skip_on_cran() skip_if_not(up, "NIST Web Book is down") @@ -110,25 +97,6 @@ test_that("nist_ri() works with multiple queries", { expect_equivalent(unique(myRIs$query), c("78-70-6", "13474-59-4")) }) -test_that("nist_ri() warns when multiple results", { - skip_on_cran() - skip_if_not(up, "NIST Web Book is down") - - expect_warning( - nist_ri("Longipinene", from = "name"), - "More than one match for 'Longipinene'. Returning NA.") -}) - -test_that("nist_ri() warns when no chromatography data", { - skip_on_cran() - skip_if_not(up, "NIST Web Book is down") - - expect_warning( - nist_ri("methane", from = "name"), - "There are no chromatography data for 'methane'. Returning NA." - ) -}) - test_that("cas = is deprecated gently", { skip_on_cran() skip_if_not(up, "NIST Web Book is down") diff --git a/tests/testthat/test-pubchem.R b/tests/testthat/test-pubchem.R index ee983007..1f5fa0a4 100644 --- a/tests/testthat/test-pubchem.R +++ b/tests/testthat/test-pubchem.R @@ -149,13 +149,12 @@ test_that("pc_sect()", { a <- pc_sect(c(311, 176, 1118, "balloon", NA), "pKa") expect_s3_class(a, c("tbl_df", "tbl", "data.frame")) - expect_equal(names(a), c("CID", "Name", "Result", "SourceName", "SourceID")) - expect_equal(a$CID, c("311", "176", "1118", "balloon", NA)) - expect_equal(a$Name, c("Citric acid", "Acetic acid", NA, NA, NA)) - expect_equal(a$Result, c("2.79", "4.76 (at 25 °C)", NA, NA, NA)) - expect_equal(a$SourceName, c("DrugBank", "DrugBank", NA, NA, NA)) - expect_equal(a$SourceID, c("DB04272", "DB03166", NA, NA, NA)) - + expect_equal(mean(c("Citric acid", "Acetic acid", NA) %in% a$Name), 1) + expect_equal(mean(c("2.79", "pKa3 6.43 (25 °)", "4.76 (at 25 °C)", + "pKa 4.78 (25 °)", NA) %in% a$Result), 1) + expect_equal(mean(c("DrugBank", "FooDB", NA) %in% a$SourceName), 1) + expect_equal(mean(c("DB04272", "FDB012586", "DB03166","FDB008299", NA) %in% + a$SourceID), 1) b <- pc_sect(2231, "depositor-supplied synonyms", "substance") expect_s3_class(b, c("tbl_df", "tbl", "data.frame")) expect_equal(names(b), c("SID", "Name", "Result", "SourceName", "SourceID")) @@ -172,11 +171,4 @@ test_that("pc_sect()", { expect_equal(names(d), c("pdbID", "Name", "Result", "SourceName", "SourceID")) expect_equivalent(d$Result[1], ">pdb|1ZHY|A Chain A, 1 Kes1 Protein (Run BLAST)") - - e <- pc_sect("US2013040379", "Patent Identifier Synonyms", "patent") - expect_s3_class(e, c("tbl_df", "tbl", "data.frame")) - expect_equal(names(e), c("PatentID", "Name", "Result", "SourceName", - "SourceID")) - expect_equivalent(e$Result, c("US20130040379", "US20130040379A1", - "US2013040379A1")) -}) + }) diff --git a/tests/testthat/test-srs.R b/tests/testthat/test-srs.R new file mode 100644 index 00000000..77e2cb04 --- /dev/null +++ b/tests/testthat/test-srs.R @@ -0,0 +1,24 @@ +up <- ping_service("srs") + +test_that("SRS returns correct results", { + skip_on_cran() + skip_if_not(up, "SRS is down") + + a <- srs_query(NA) + b <- srs_query("balloon") + c <- srs_query("50-00-0", from = "cas") + d <- srs_query("aniline", from = "name") + e <- srs_query(c("50-00-0", "balloon", NA), from = "cas") + + expect_true(is.na(a)) + expect_true(is.na(b)) + expect_is(c, "list") + expect_is(c$`50-00-0`, "data.frame") + expect_equal(c$`50-00-0`$systematicName, "Formaldehyde") + + expect_equal(d$aniline$systematicName, "Benzenamine") + + expect_equal(length(e), 3) + expect_true(is.na(e$`balloon`)) + expect_true(is.na(e$`NA`)) +})