Skip to content

Commit

Permalink
Merge pull request #42 from nhs-r-community/41-link-apis
Browse files Browse the repository at this point in the history
41 link apis
  • Loading branch information
Lextuga007 authored Mar 20, 2024
2 parents 11350e8 + 3593740 commit b263dfa
Show file tree
Hide file tree
Showing 10 changed files with 158 additions and 133 deletions.
2 changes: 0 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,4 @@ export(api_url)
export(get_data)
export(is_lsoa)
export(is_postcode)
export(pull_table_data)
export(retrieve_data)
importFrom(magrittr,"%>%")
File renamed without changes.
88 changes: 88 additions & 0 deletions R/get_data-helper.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#' Getting data from the IMD api
#'
#' @description
#' Only required for IMD as postcodes is routed through the {NHSRpostcodetools}
#' package
#'
#' @return dataset
#' @export
api_url <- function() {
httr2::request(paste0(
"https://services3.arcgis.com/ivmBBrHfQfDnDf8Q/arcgis/rest/services/",
"Indices_of_Multiple_Deprivation_(IMD)_2019/FeatureServer/0/query"
)) |>
httr2::req_url_query(f = "json")
}

#' use batched IDs to retrieve table data
#'
#' @param req used in function \code{\link{imd_api}}
#' @param ids_vec used in function \code{\link{imd_api}}
#'
#' @return function
#' @noRd
retrieve_data <- function(req, ids_vec) {
ids <- stringr::str_flatten(ids_vec, collapse = ",")
req |>
httr2::req_url_query(objectIds = ids) |>
httr2::req_url_query(outFields = "*") |> # returns all columns
httr2::req_url_query(returnGeometry = FALSE) |> # superfluous tbf
httr2::req_retry(max_tries = 3) |> # shouldn't be needed
httr2::req_perform()
}


#' pull actual data out from API JSON response
#'
#' @param respused in function \code{\link{imd_api}}
#'
#' @return function
#' @noRd
pull_table_data <- function(resp) {
resp |>
httr2::resp_check_status() |>
httr2::resp_body_json() |>
purrr::pluck("features") |>
purrr::map_df("attributes") |>
janitor::clean_names()
}

#' Get IMD data through API
#'
#' @description
#' Relies on functions \code{\link{retrieve_data}},
#' \code{\link{pull_table_data}} and \code{\link{api_url}}
#'
#' @param text String. Used in the query function and feeds in either postcodes
#' or lsoas from data in the expected API url format
#' @param req
#'
#' @return data frame
#' @noRd
imd_api <- function(text, req) {
ids <- req |>
httr2::req_url_query(returnIdsonly = TRUE) |>
httr2::req_url_query(where = text) |>
httr2::req_perform() |>
httr2::resp_body_json() |>
purrr::pluck("objectIds")

ids_batched <- NHSRpostcodetools::batch_it(ids, 100L)

# Uses function retrieve data
# safely handle any errors
poss_retrieve_data <- purrr::possibly(retrieve_data)

resps <- ids_batched |>
purrr::map(\(x) poss_retrieve_data(req, x)) |>
purrr::compact()

# Uses function pull_table_data
poss_pull_table_data <- purrr::possibly(pull_table_data)

data_out <- resps |>
purrr::map(poss_pull_table_data) |>
purrr::list_rbind()

data_out
}
141 changes: 50 additions & 91 deletions R/get_data.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,3 @@
#' Getting data from the IMD api
#'
#' @description
#' Only required for IMD as postcodes is routed through the {NHSRpostcodetools}
#' package
#'
#' @return dataset
#' @export
api_url <- function() {
httr2::request(paste0(
"https://services3.arcgis.com/ivmBBrHfQfDnDf8Q/arcgis/rest/services/",
"Indices_of_Multiple_Deprivation_(IMD)_2019/FeatureServer/0/query"
)) |>
httr2::req_url_query(f = "json")
}

#' Query information to restrict data returned
#'
#' @description
Expand Down Expand Up @@ -53,13 +37,14 @@ get_data <- function(data,
column <- rlang::as_string(column)

# Check there is corresponding type data somewhere in data frame
# Use this to allow for other column names to be used in later code
is_postcode_check <- sum(is_postcode(as.vector(t(data))), na.rm = TRUE)
is_lsoa_check <- sum(is_lsoa(as.vector(t(data))), na.rm = TRUE)

if (column == "default" && is_postcode_check == 0 && is_lsoa_check > 0) {
column <- "lsoa11"
} else if (column == "default" && url_type == "postcode") {
if ("postcode" %in% colnames(data)) {
column <- "postcode"
} else if ("lsoa11" %in% colnames(data)) {
column <- "lsoa11"
} else {
column <- rlang::eval_tidy(rlang::quo(column))
}
Expand All @@ -78,7 +63,11 @@ get_data <- function(data,
fix_invalid = fix_invalid,
var = column # Not required but doesn't cause error
)
} else if (is.atomic(data) && is_postcode_check == 0 &&
}

## Generate specific text for the url

if (is.atomic(data) && is_postcode_check == 0 &&
is_lsoa_check > 0) {
text <- paste0(
"LSOA11CD IN ('",
Expand All @@ -96,34 +85,13 @@ get_data <- function(data,
collapse = "', '"
), "')"
)
} else {
data
}

if (is_postcode_check == 0 && is_lsoa_check > 0 | url_type == "imd") {
ids <- req |>
httr2::req_url_query(returnIdsonly = TRUE) |>
httr2::req_url_query(where = text) |>
httr2::req_perform() |>
httr2::resp_body_json() |>
purrr::pluck("objectIds")

ids_batched <- NHSRpostcodetools::batch_it(ids, 100L)

# Uses function retrieve data
# safely handle any errors
poss_retrieve_data <- purrr::possibly(retrieve_data)

resps <- ids_batched |>
purrr::map(\(x) poss_retrieve_data(req, x)) |>
purrr::compact()

# Uses function pull_table_data
poss_pull_table_data <- purrr::possibly(pull_table_data)

data_out <- resps |>
purrr::map(poss_pull_table_data) |>
purrr::list_rbind()
} else if (exists("data_transformed") && url_type == "imd") {
text <- paste0(
"LSOA11CD IN ('",
paste(data_transformed$lsoa_code,
collapse = "', '"
), "')"
)
}

# Because APIs only return data where a match has been made which results in
Expand All @@ -132,64 +100,55 @@ get_data <- function(data,
# this but IMD is handled here.

if (exists("data_transformed") && is.data.frame(data)) {
data |>
pc_data <- data |>
dplyr::left_join(
data_transformed
)
} else if (exists("data_transformed") && is.atomic(data)) {
tibble::as_tibble(data) |>
dplyr::left_join(
data_transformed,
dplyr::join_by(value == postcode)
) |>
dplyr::rename(postcode = value)
} else if (is.data.frame(data)) {
data |>
pc_data <- data_transformed
}


## IMD data

if (is_postcode_check == 0 && is_lsoa_check > 0 &&
is.data.frame(data)) {
data_out <- imd_api(
text = text,
req = req
)

imd_data <- data |>
dplyr::left_join(
data_out,
dplyr::join_by({{ column }} == lsoa11cd)
)
} else if (is.atomic(data)) {
tibble::as_tibble(data) |>
} else if (is_postcode_check == 0 && is_lsoa_check > 0 && is.atomic(data)) {
data_out <- imd_api(
text = text,
req = req
)

imd_data <- tibble::as_tibble(data) |>
dplyr::left_join(
data_out,
dplyr::join_by(value == lsoa11cd)
) |>
dplyr::rename(lsoa11 = value)
} else {
data
}
}

#' use batched IDs to retrieve table data
#'
#' @param req used in function \code{\link{get_data}}
#' @param ids_vec used in function \code{\link{get_data}}
#'
#' @return function
#' @export
retrieve_data <- function(req, ids_vec) {
ids <- stringr::str_flatten(ids_vec, collapse = ",")
req |>
httr2::req_url_query(objectIds = ids) |>
httr2::req_url_query(outFields = "*") |> # returns all columns
httr2::req_url_query(returnGeometry = FALSE) |> # superfluous tbf
httr2::req_retry(max_tries = 3) |> # shouldn't be needed
httr2::req_perform()
}
## Final data

if (exists("pc_data") && url_type == "imd") {
data_out <- imd_api(
text = text,
req = req
)

#' pull actual data out from API JSON response
#'
#' @param respused in function \code{\link{get_data}}
#'
#' @return function
#' @export
pull_table_data <- function(resp) {
resp |>
httr2::resp_check_status() |>
httr2::resp_body_json() |>
purrr::pluck("features") |>
purrr::map_df("attributes") |>
janitor::clean_names()
data_out
} else if (exists("pc_data") && url_type == "postcode") {
pc_data
} else {
imd_data
}
}
2 changes: 1 addition & 1 deletion man/api_url.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/is_lsoa.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/is_postcode.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 0 additions & 17 deletions man/pull_table_data.Rd

This file was deleted.

19 changes: 0 additions & 19 deletions man/retrieve_data.Rd

This file was deleted.

18 changes: 17 additions & 1 deletion tests/testthat/test-get_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

postcodes <- c("HD1 2UT", "HD1 2UU", "HD1 2UV")

imd <- c("E01011107", "E01011229", "E01000002")
imd <- c("E01011107", "E01011229", "E01002")

# # Taken from
# # www.gov.uk/government/statistics/english-indices-of-deprivation-2019
Expand Down Expand Up @@ -216,8 +216,24 @@ httptest2::with_mock_dir("imd", {
ncol(get_data(lsoa_df1, url_type = "imd")), n_col_df
)

testthat::expect_equal(
nrow(get_data(lsoa_df1)), n_rows
)

testthat::expect_equal(
ncol(get_data(lsoa_df1)), n_col_df
)

# vectors

testthat::expect_equal(
nrow(get_data(imd)), n_rows
)

testthat::expect_equal(
ncol(get_data(imd)), n_col_vector
)

testthat::expect_equal(
nrow(get_data(imd, url_type = "imd")), n_rows
)
Expand Down

0 comments on commit b263dfa

Please sign in to comment.