Skip to content

Commit

Permalink
Merge pull request #9 from WorldHealthOrganization/feature_dev
Browse files Browse the repository at this point in the history
Feature dev
  • Loading branch information
truenomad authored Apr 29, 2024
2 parents 5f376cf + 35c4401 commit cd65a3d
Show file tree
Hide file tree
Showing 9 changed files with 235 additions and 47 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ Imports:
jsonlite,
tidyselect,
httpcode,
tibble
tibble,
pbmcapply
Suggests:
testthat,
withr,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Generated by roxygen2: do not edit by hand

export(get_multi_ona_data)
export(get_ona_data)
export(prep_geonames)
export(prep_match_datatypes)
export(prep_match_names)
export(prep_ona_data_endpoints)
export(read)
export(save)
importFrom(cli,cli_alert_warning)
Expand Down
129 changes: 121 additions & 8 deletions R/get_ona.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,59 @@ check_status_api <- function(response) {
}
}

#' Fetch ONA API Data
#'
#' Retrieves data from the ONA API endpoints to display available forms and
#' datasets. It initially, uses given API token for authorization and checks the
#' initial API response status. Then it validates the base URL and constructs
#' the full API endpoint URL to retrieve a dataframe of all the available for
#' forms/data.
#'
#' @param base_url The base URL for the ONA API; defaults to
#' 'https://api.whonghub.org'.
#' @param api_token API token for authentication.
#'
#' @return Data frame of the API endpoint data.
#'
#' @export
#' @examples
#' # prep_ona_data_endpoints(api_token = "your_api_token_here")
prep_ona_data_endpoints <- function(
base_url = "https://api.whonghub.org", api_token) {

# check base url validity
base_url_pattern <- "^(https?://[^/]+).*"
if ( grepl(base_url_pattern, base_url)){
base_url <- sub(base_url_pattern, "\\1", base_url)
} else {
stop(paste("Error: ", base_url, " is not a valid base url"))
}

# set up URL
api_url <- paste0(base_url,"/api/v1/data")

# Validate base url link first -----------------------------------------------

# get head before download
response <- httr::HEAD(
base_url,
config = httr::add_headers(Authorization = paste("Token", api_token)))

# check status of call
check_status_api(response)

# Validate url link before downloading ---------------------------------------

# get one data endpoint df
response <- httr::GET(
api_url,
config = httr::add_headers(Authorization = paste("Token", api_token))) |>
httr::content("text", encoding = "UTF-8") |>
jsonlite::fromJSON(simplifyDataFrame = TRUE)

return(response)
}

#' Get a Page of Data from an API
#'
#' This function retrieves a single page of data from a specified API endpoint.
Expand Down Expand Up @@ -118,15 +171,20 @@ get_ona_data <- function(
get_next_page <- TRUE
start <- 0

# Validate url link before downloading ---------------------------------------

# get head before download
response <- httr::HEAD(
api_url,
config = httr::add_headers(Authorization = paste("Token", api_token)))
# Check if the form id is available for download -----------------------------

# check status of call
check_status_api(response)
resp_data <- prep_ona_data_endpoints(
base_url = base_url,
api_token = api_token)

if (!(form_id %in% unique(resp_data$id))) {
cli::cli_abort(
paste0("Form IDs ",
toString(form_id),
" not found. Use `prep_ona_data_endpoints()` ",
"to check available forms for download.")
)
}

# Download data (use pagination if necessary) --------------------------------

Expand Down Expand Up @@ -172,3 +230,58 @@ get_ona_data <- function(

return(results)
}

#' Get Data from ONA for Multiple Forms
#'
#' This function retrieves data for a specified form from the ONA API using a
#' provided API token and constructs a unique key for each dataset. It returns
#' the data in a structured format if the request is successful.
#'
#' @param base_url The base URL for the ONA API; defaults to
#' 'https://api.whonghub.org'.
#' @param form_ids A vector containing form id number to identify each form.
#' @param api_token A string specifying the API token for ONA.
#'
#' @return A data frame containing the combined data from all specified form
#' IDs, and includes from_id column.
#' @examples
#' # api_token <- "your_api_token_here"
#' # data <- get_multi_ona_data(form_ids = c(623, 432, 643), api_token)
#' @export
get_multi_ona_data <- function(
base_url = "https://api.whonghub.org", form_ids, api_token) {

# Check if the form IDs are available for download ---------------------------
resp_data <- prep_ona_data_endpoints(
base_url = base_url,
api_token = api_token
)

if (!all(form_ids %in% unique(resp_data$id))) {
missing_ids <- form_ids[!form_ids %in% unique(resp_data$id)]
cli::cli_abort(
paste0("Form IDs ",
toString(missing_ids),
" not found. Use `prep_ona_data_endpoints()` ",
"to check available forms for download.")
)
}

# Fetch data in parallel for each form ID ------------------------------------
results_list <- pbmcapply::pbmclapply(
form_ids,
function(form_id) {
get_ona_data(form_id = form_id, api_token = api_token)
},
mc.cores = parallel::detectCores() - 1
)

# Set names for each element in the results list to match form_ids
names(results_list) <- as.character(form_ids)

# Combine all into one df
combined_data <- dplyr::bind_rows(results_list, .id = "form_id")

return(combined_data)
}

13 changes: 10 additions & 3 deletions R/prep_match_datatypes.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,20 @@ prep_match_datatypes <- function(ref_dataframe, target_dataframe) {
target_dataframe <- target_dataframe |>
dplyr::mutate(!!col := as.logical(!!rlang::sym(col)))
} else if (col_type == "Date") {

target_dataframe <- target_dataframe |>
dplyr::mutate(!!col := as.Date(!!rlang::sym(col)
# format = "Y-%m-%d"
))
}
} #else if (col_type == 'c("POSIXct", "POSIXt")') {
# target_dataframe <- target_dataframe |>
# dplyr::mutate(!!col == as.POSIXct(!!rlang::sym(col),
# format = "%Y-%m-%d"))
# } else if (col_type == 'c("POSIXlt", "POSIXt")') {
# target_dataframe <- target_dataframe |>
# dplyr::mutate(!!col == as.POSIXlt(!!rlang::sym(col),
# format = "%Y-%m-%d"))
# }
}
}
return(target_dataframe)
}

14 changes: 7 additions & 7 deletions R/prep_match_names.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,23 +73,23 @@ prep_match_names <- function(ref_dataframe, target_dataframe, report = TRUE) {
setdiff(names(target_dataframe), names(ref_dataframe))
if (length(no_match_cols) > 0) {
print(
paste(
"No match found for ",
no_match_cols,
". initial name maintained. A total of ",
length(names(no_match_cols)),
"/",
length(names(no_match_cols)),
" columns from the target dataframe were matched to the reference dataframe"
)
)
print(
"No match found for ",
no_match_cols,
". initial name maintained. A total of "
)
} else{
print(
paste(
"All columns were successfully matched and renamed. A total of ",
length(names(common_cols)),
length(common_cols),
"/",
length(names(common_cols)),
length(common_cols),
" columns from the target dataframe were matched to the reference dataframe"
)
)
Expand Down
29 changes: 29 additions & 0 deletions man/get_multi_ona_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions man/prep_ona_data_endpoints.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

62 changes: 34 additions & 28 deletions tests/testthat/test-prep_match_datatypes.R
Original file line number Diff line number Diff line change
@@ -1,30 +1,36 @@
testthat::test_that("Data types match correctly", {

# set up sample ref target data
ref_df <- tibble::tibble(
integer_col = 1:3,
character_col = c("a", "b", "c"),
numeric_col = c(1.1, 2.2, 3.3),
date_col = as.Date(
c("2021-01-01", "2021-01-02", "2021-01-03"))
)

# set up sample target data
target_df <- tibble::tibble(
integer_col = c("1", "2", "3"),
character_col = 1:3,
numeric_col = c("1.1", "2.2", "3.3"),
date_col = c("2021-01-01", "2021-01-02", "2021-01-03")
)

# match datatypes
matched_df <- prep_match_datatypes(ref_df, target_df)

# check if prep_match_datatypes works
testthat::expect_type(matched_df$integer_col, "integer")
testthat::expect_type(matched_df$character_col, "character")
testthat::expect_type(matched_df$numeric_col, "double")
testthat::expect_equal(class(matched_df$date_col), "Date")
# set up sample ref target data
ref_df <- tibble::tibble(
integer_col = 1:3,
character_col = c("a", "b", "c"),
numeric_col = c(1.1, 2.2, 3.3),
date_col = as.Date(c(
"2021-01-01", "2021-01-02", "2021-01-03"
)),
posixct_date_col = as.POSIXct(c("2024-04-17",
"2024-04-18",
"2024-04-19"),
format = "%Y-%m-%d")
)

# set up sample target data
target_df <- tibble::tibble(
integer_col = c("1", "2", "3"),
character_col = 1:3,
numeric_col = c("1.1", "2.2", "3.3"),
date_col = c("2021-01-01", "2021-01-02", "2021-01-03"),
posixct_date_col = c("2024-04-17",
"2024-04-18",
"2024-04-19")
)

# match datatypes
matched_df <- prep_match_datatypes(ref_df, target_df)

# check if prep_match_datatypes works
testthat::expect_type(matched_df$integer_col, "integer")
testthat::expect_type(matched_df$character_col, "character")
testthat::expect_type(matched_df$numeric_col, "double")
testthat::expect_equal(class(matched_df$date_col), "Date")
#testthat::expect_equal(class(matched_df$posixct_date_col), 'c("POSIXct", "POSIXt")')
})


3 changes: 3 additions & 0 deletions tests/testthat/test-prep_ona_data_endpoints.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
test_that("multiplication works", {
expect_equal(2 * 2, 4)
})

0 comments on commit cd65a3d

Please sign in to comment.