Skip to content

Commit

Permalink
Transition from httr to httr2 (#198)
Browse files Browse the repository at this point in the history
* move to httr2

* don't need that snapshot test in CI

* change download_hydat

* webservice

* adjust some messages

* add tidyhydat_perform

* drop from imports

* use for stations

* drop from csv

* all time

* last one?

* wip error handling

* bit more clean up

* style

* one more skip

* set min version of httr2

* bump R version
  • Loading branch information
boshek authored Oct 4, 2024
1 parent f781dbb commit f72aab9
Show file tree
Hide file tree
Showing 12 changed files with 178 additions and 181 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ License: Apache License (== 2.0) | file LICENSE
URL: https://docs.ropensci.org/tidyhydat/, https://github.com/ropensci/tidyhydat/
BugReports: https://github.com/ropensci/tidyhydat/issues/
Depends:
R (>= 3.4.0)
R (>= 4.0.0)
Imports:
cli (>= 1.0.0),
crayon (>= 1.3.4),
DBI (>= 0.7),
dbplyr (>= 1.1.0),
dplyr (>= 0.7.4),
httr (>= 1.3.1),
httr2 (>= 1.0.0),
lubridate (>= 1.6.0),
rappdirs (>= 0.3.1),
readr (>= 1.1.1),
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# tidyhydat 0.6.2
- bump minimum R version to 4.0.0
- dropped httr in favour of httr2
- fix bug where `download_hydat()` fails if `tempdir()` is on a different device than `hydat_path` (@mpdavison, #192)
- fix bug where `download_hydat()` wasn't checking alternative paths for success (@Travis-Simmons)

Expand Down
36 changes: 21 additions & 15 deletions R/download.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,15 @@ download_hydat <- function(dl_hydat_here = NULL, ask = TRUE) {
new_hydat <- hy_remote()
# Make the download URL
url <- paste0(hy_base_url(), "Hydat_sqlite3_", new_hydat, ".zip")
response <- httr::HEAD(url)
httr::stop_for_status(response)
size <- round(as.numeric(httr::headers(response)[["Content-Length"]]) / 1000000, 0)
req <- httr2::request(url)
req <- httr2::req_method(req, "HEAD")
req <- tidyhydat_agent(req)
req <- tidyhydat_perform(req)
httr2::resp_check_status(req)

size <- round(as.numeric(
httr2::resp_header(req, "Content-Length")
) / 1000000, 0)


## Do we need to download a new version?
Expand All @@ -77,11 +83,10 @@ download_hydat <- function(dl_hydat_here = NULL, ask = TRUE) {
if (!dl_overwrite) {
info("HYDAT is updated on a quarterly basis, check again soon for an updated version.")
}

if (new_hydat != existing_hydat & ask) { # New DB available or no local DB at all
msg <- paste0(
"Downloading HYDAT will take up to 10 minutes (",
size, " MB). \nThis will remove any older versions of HYDAT, if applicable. \nIs that okay?"
"This version of HYDAT is ", size, "MB in size and will take some time to download.
\nThis will remove any older versions of HYDAT, if applicable. \nIs that okay?"
)
ans <- ask(msg)
} else {
Expand All @@ -106,12 +111,10 @@ download_hydat <- function(dl_hydat_here = NULL, ask = TRUE) {
tmp <- tempfile("hydat_", fileext = ".zip")

## Download the zip file
res <- httr::GET(
url, httr::write_disk(tmp), httr::progress("down"),
httr::user_agent("https://github.com/ropensci/tidyhydat")
)
on.exit(file.remove(tmp), add = TRUE)
httr::stop_for_status(res)
hydb_req <- httr2::request(url)
hydb_req <- tidyhydat_agent(hydb_req)
resp <- tidyhydat_perform(hydb_req, path = tmp)
httr2::resp_check_status(resp)

## Extract the file to a temporary dir
if (file.exists(tmp)) info("Extracting HYDAT")
Expand Down Expand Up @@ -153,10 +156,13 @@ hy_remote <- function() {
# Run network check
network_check(hy_base_url())

x <- httr::GET(hy_base_url())
httr::stop_for_status(x)
req <- httr2::request(hy_base_url())
req <- tidyhydat_perform(req)
resp <- httr2::resp_check_status(req)


raw_date <- substr(
gsub("^.*\\Hydat_sqlite3_", "", httr::content(x, "text")),
gsub("^.*\\Hydat_sqlite3_", "", httr2::resp_body_string(req)),
1, 8
)

Expand Down
72 changes: 43 additions & 29 deletions R/realtime-webservice.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,10 @@
#' @export


realtime_ws <- function(station_number, parameters = NULL,
start_date = Sys.Date() - 30, end_date = Sys.Date()) {
if (is_mac()) {
# temporary patch to work around vroom 1.6.4 bug
readr::local_edition(1)
}
realtime_ws <- function(station_number,
parameters = NULL,
start_date = Sys.Date() - 30,
end_date = Sys.Date()) {

if (is.null(parameters)) parameters <- c(46, 16, 52, 47, 8, 5, 41, 18)

Expand All @@ -92,17 +90,26 @@ realtime_ws <- function(station_number, parameters = NULL,


if (!grepl("[0-9]{4}-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]", start_date)) {
stop("Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats", call. = FALSE)
stop(
"Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats",
call. = FALSE
)
}

if (!grepl("[0-9]{4}-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]", end_date)) {
stop("Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats", call. = FALSE)
stop(
"Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats",
call. = FALSE
)
}


if (!is.null(start_date) & !is.null(end_date)) {
if (lubridate::ymd_hms(end_date) < lubridate::ymd_hms(start_date)) {
stop("start_date is after end_date. Try swapping values.", call. = FALSE)
stop(
"start_date is after end_date. Try swapping values.",
call. = FALSE
)
}
}

Expand All @@ -113,65 +120,72 @@ realtime_ws <- function(station_number, parameters = NULL,

## Build link for GET
baseurl <- "https://wateroffice.ec.gc.ca/services/real_time_data/csv/inline?"


station_string <- paste0("stations[]=", station_number, collapse = "&")
parameters_string <- paste0("parameters[]=", parameters, collapse = "&")
date_string <- paste0("start_date=", substr(start_date, 1, 10), "%20", substr(start_date, 12, 19),
"&end_date=", substr(end_date, 1, 10), "%20", substr(end_date, 12, 19))
date_string <- paste0(
"start_date=", substr(start_date, 1, 10), "%20", substr(start_date, 12, 19),
"&end_date=", substr(end_date, 1, 10), "%20", substr(end_date, 12, 19)
)

## paste them all together
url_for_GET <- paste0(
query_url <- paste0(
baseurl,
station_string, "&",
parameters_string, "&",
date_string
)

## Get data
get_ws <- httr::GET(url_for_GET, httr::user_agent("https://github.com/ropensci/tidyhydat"))
req <- httr2::request(query_url)
req <- tidyhydat_agent(req)
resp <- httr2::req_perform(req)

## Give webservice some time
Sys.sleep(1)


## Check the GET status
httr::stop_for_status(get_ws)

if (httr::headers(get_ws)$`content-type` != "text/csv; charset=utf-8") {
stop("GET response is not a csv file")
## Check the respstatus
httr2::resp_check_status(resp)


if (httr2::resp_headers(resp)$`Content-Type` != "text/csv; charset=utf-8") {
stop("Response is not a csv file")
}

## Turn it into a tibble and specify correct column classes
csv_df <- httr::content(
get_ws,
type = "text/csv",
encoding = "UTF-8",
csv_df <- readr::read_csv(
httr2::resp_body_string(resp),
col_types = "cTidccc"
)
)


## Check here to see if csv_df has any data in it
if (nrow(csv_df) == 0) {
stop("No data exists for this station query")
}

## Rename columns to reflect tidyhydat naming
colnames(csv_df) <- c("STATION_NUMBER","Date","Parameter","Value","Grade","Symbol","Approval")
colnames(csv_df) <- c("STATION_NUMBER", "Date", "Parameter", "Value", "Grade", "Symbol", "Approval")

csv_df <- dplyr::left_join(
csv_df,
dplyr::select(tidyhydat::param_id, -Name_Fr),
by = c("Parameter")
)
csv_df <- dplyr::select(csv_df, STATION_NUMBER, Date, Name_En, Value, Unit,
Grade, Symbol, Approval, Parameter, Code)
csv_df <- dplyr::select(
csv_df, STATION_NUMBER, Date, Name_En, Value, Unit,
Grade, Symbol, Approval, Parameter, Code
)

## What stations were missed?
differ <- setdiff(unique(station_number), unique(csv_df$STATION_NUMBER))
if (length(differ) != 0) {
if (length(differ) <= 10) {
message("The following station(s) were not retrieved: ", paste0(differ, sep = " "))
message("Check station number for typos or if it is a valid station in the network")
}
else {
} else {
message("More than 10 stations from the initial query were not returned. Ensure realtime and active status are correctly specified.")
}
} else {
Expand All @@ -180,7 +194,7 @@ realtime_ws <- function(station_number, parameters = NULL,

p_differ <- setdiff(unique(parameters), unique(csv_df$Parameter))
if (length(p_differ) != 0) {
message("The following valid parameter(s) were not retrieved for at least one station you requested: ", paste0(p_differ, sep = " "))
message("The following valid parameter(s) were not retrieved for at least one station you requested: ", paste0(p_differ, sep = " "))
} else {
message("All parameters successfully retrieved")
}
Expand Down
22 changes: 5 additions & 17 deletions R/realtime.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,22 +101,10 @@ realtime_stations <- function(prov_terr_state_loc = NULL) {
prov <- prov_terr_state_loc

realtime_link <- "https://dd.weather.gc.ca/hydrometric/doc/hydrometric_StationList.csv"
resp_str <- realtime_parser(realtime_link)

url_check <- httr::GET(realtime_link, httr::user_agent("https://github.com/ropensci/tidyhydat"))

## Checking to make sure the link is valid
if (httr::http_error(url_check) == "TRUE") {
stop(paste0(realtime_link, " is not a valid url. Datamart may be down or the url has changed."))
}

if (is_mac()) {
# temporary patch to work around vroom 1.6.4 bug
readr::local_edition(1)
}

net_tibble <- httr::content(url_check,
type = "text/csv",
encoding = "UTF-8",
net_tibble <- readr::read_csv(
resp_str,
skip = 1,
col_names = c(
"STATION_NUMBER",
Expand All @@ -141,7 +129,7 @@ realtime_stations <- function(prov_terr_state_loc = NULL) {
}


as.realtime(dplyr::filter(net_tibble, PROV_TERR_STATE_LOC %in% prov))
as.realtime(net_tibble[net_tibble$PROV_TERR_STATE_LOC %in% prov, ])
}

#' Add local datetime column to realtime tibble
Expand Down Expand Up @@ -214,4 +202,4 @@ realtime_daily_mean <- function(.data, na.rm = FALSE) {
df_mean <- dplyr::arrange(df_mean, Parameter)

dplyr::ungroup(df_mean)
}
}
4 changes: 2 additions & 2 deletions R/realtime_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ plot.realtime <- function(x = NULL, Parameter = c("Flow", "Level"), ...) {
Parameter <- match.arg(Parameter)

if (length(unique(x$STATION_NUMBER)) > 1L) {
stop("realtime plot methods only work with objects that contain one station", call. = FALSE)
stop("realtime plots only work with objects that contain one station", call. = FALSE)
}

if (is.null(x)) stop("Station(s) not present in the datamart")
if (is.null(x)) stop("Station not present in the datamart")

## Catch mis labelled parameter
if (Parameter == "Level" && ((nrow(x[x$Parameter == "Level", ]) == 0) | all(is.na(x[x$Parameter == "Level", ]$Value)))) {
Expand Down
Loading

0 comments on commit f72aab9

Please sign in to comment.