diff --git a/DESCRIPTION b/DESCRIPTION index 0615c20..fca53a8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,7 +30,8 @@ Imports: jsonlite, tidyselect, httpcode, - tibble + tibble, + pbmcapply Suggests: testthat, withr, diff --git a/NAMESPACE b/NAMESPACE index 3667942..12f757d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,11 @@ # Generated by roxygen2: do not edit by hand +export(get_multi_ona_data) export(get_ona_data) export(prep_geonames) export(prep_match_datatypes) export(prep_match_names) +export(prep_ona_data_endpoints) export(read) export(save) importFrom(cli,cli_alert_warning) diff --git a/R/get_ona.R b/R/get_ona.R index 38f5548..d3243bd 100644 --- a/R/get_ona.R +++ b/R/get_ona.R @@ -32,6 +32,59 @@ check_status_api <- function(response) { } } +#' Fetch ONA API Data +#' +#' Retrieves data from the ONA API endpoints to display available forms and +#' datasets. It initially, uses given API token for authorization and checks the +#' initial API response status. Then it validates the base URL and constructs +#' the full API endpoint URL to retrieve a dataframe of all the available for +#' forms/data. +#' +#' @param base_url The base URL for the ONA API; defaults to +#' 'https://api.whonghub.org'. +#' @param api_token API token for authentication. +#' +#' @return Data frame of the API endpoint data. +#' +#' @export +#' @examples +#' # prep_ona_data_endpoints(api_token = "your_api_token_here") +prep_ona_data_endpoints <- function( + base_url = "https://api.whonghub.org", api_token) { + + # check base url validity + base_url_pattern <- "^(https?://[^/]+).*" + if ( grepl(base_url_pattern, base_url)){ + base_url <- sub(base_url_pattern, "\\1", base_url) + } else { + stop(paste("Error: ", base_url, " is not a valid base url")) + } + + # set up URL + api_url <- paste0(base_url,"/api/v1/data") + + # Validate base url link first ----------------------------------------------- + + # get head before download + response <- httr::HEAD( + base_url, + config = httr::add_headers(Authorization = paste("Token", api_token))) + + # check status of call + check_status_api(response) + + # Validate url link before downloading --------------------------------------- + + # get one data endpoint df + response <- httr::GET( + api_url, + config = httr::add_headers(Authorization = paste("Token", api_token))) |> + httr::content("text", encoding = "UTF-8") |> + jsonlite::fromJSON(simplifyDataFrame = TRUE) + + return(response) +} + #' Get a Page of Data from an API #' #' This function retrieves a single page of data from a specified API endpoint. @@ -118,15 +171,20 @@ get_ona_data <- function( get_next_page <- TRUE start <- 0 - # Validate url link before downloading --------------------------------------- - - # get head before download - response <- httr::HEAD( - api_url, - config = httr::add_headers(Authorization = paste("Token", api_token))) + # Check if the form id is available for download ----------------------------- - # check status of call - check_status_api(response) + resp_data <- prep_ona_data_endpoints( + base_url = base_url, + api_token = api_token) + + if (!(form_id %in% unique(resp_data$id))) { + cli::cli_abort( + paste0("Form IDs ", + toString(form_id), + " not found. Use `prep_ona_data_endpoints()` ", + "to check available forms for download.") + ) + } # Download data (use pagination if necessary) -------------------------------- @@ -172,3 +230,58 @@ get_ona_data <- function( return(results) } + +#' Get Data from ONA for Multiple Forms +#' +#' This function retrieves data for a specified form from the ONA API using a +#' provided API token and constructs a unique key for each dataset. It returns +#' the data in a structured format if the request is successful. +#' +#' @param base_url The base URL for the ONA API; defaults to +#' 'https://api.whonghub.org'. +#' @param form_ids A vector containing form id number to identify each form. +#' @param api_token A string specifying the API token for ONA. +#' +#' @return A data frame containing the combined data from all specified form +#' IDs, and includes from_id column. +#' @examples +#' # api_token <- "your_api_token_here" +#' # data <- get_multi_ona_data(form_ids = c(623, 432, 643), api_token) +#' @export +get_multi_ona_data <- function( + base_url = "https://api.whonghub.org", form_ids, api_token) { + + # Check if the form IDs are available for download --------------------------- + resp_data <- prep_ona_data_endpoints( + base_url = base_url, + api_token = api_token + ) + + if (!all(form_ids %in% unique(resp_data$id))) { + missing_ids <- form_ids[!form_ids %in% unique(resp_data$id)] + cli::cli_abort( + paste0("Form IDs ", + toString(missing_ids), + " not found. Use `prep_ona_data_endpoints()` ", + "to check available forms for download.") + ) + } + + # Fetch data in parallel for each form ID ------------------------------------ + results_list <- pbmcapply::pbmclapply( + form_ids, + function(form_id) { + get_ona_data(form_id = form_id, api_token = api_token) + }, + mc.cores = parallel::detectCores() - 1 + ) + + # Set names for each element in the results list to match form_ids + names(results_list) <- as.character(form_ids) + + # Combine all into one df + combined_data <- dplyr::bind_rows(results_list, .id = "form_id") + + return(combined_data) +} + diff --git a/R/prep_match_datatypes.R b/R/prep_match_datatypes.R index a5e3d52..9c8543a 100644 --- a/R/prep_match_datatypes.R +++ b/R/prep_match_datatypes.R @@ -47,13 +47,20 @@ prep_match_datatypes <- function(ref_dataframe, target_dataframe) { target_dataframe <- target_dataframe |> dplyr::mutate(!!col := as.logical(!!rlang::sym(col))) } else if (col_type == "Date") { - target_dataframe <- target_dataframe |> dplyr::mutate(!!col := as.Date(!!rlang::sym(col) - # format = "Y-%m-%d" )) - } + } #else if (col_type == 'c("POSIXct", "POSIXt")') { + # target_dataframe <- target_dataframe |> + # dplyr::mutate(!!col == as.POSIXct(!!rlang::sym(col), + # format = "%Y-%m-%d")) + # } else if (col_type == 'c("POSIXlt", "POSIXt")') { + # target_dataframe <- target_dataframe |> + # dplyr::mutate(!!col == as.POSIXlt(!!rlang::sym(col), + # format = "%Y-%m-%d")) + # } } } return(target_dataframe) } + diff --git a/R/prep_match_names.R b/R/prep_match_names.R index 140ba05..620e0e1 100644 --- a/R/prep_match_names.R +++ b/R/prep_match_names.R @@ -73,23 +73,23 @@ prep_match_names <- function(ref_dataframe, target_dataframe, report = TRUE) { setdiff(names(target_dataframe), names(ref_dataframe)) if (length(no_match_cols) > 0) { print( - paste( - "No match found for ", - no_match_cols, - ". initial name maintained. A total of ", length(names(no_match_cols)), "/", length(names(no_match_cols)), " columns from the target dataframe were matched to the reference dataframe" ) - ) + print( + "No match found for ", + no_match_cols, + ". initial name maintained. A total of " + ) } else{ print( paste( "All columns were successfully matched and renamed. A total of ", - length(names(common_cols)), + length(common_cols), "/", - length(names(common_cols)), + length(common_cols), " columns from the target dataframe were matched to the reference dataframe" ) ) diff --git a/man/get_multi_ona_data.Rd b/man/get_multi_ona_data.Rd new file mode 100644 index 0000000..fbafe95 --- /dev/null +++ b/man/get_multi_ona_data.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_ona.R +\name{get_multi_ona_data} +\alias{get_multi_ona_data} +\title{Get Data from ONA for Multiple Forms} +\usage{ +get_multi_ona_data(base_url = "https://api.whonghub.org", form_ids, api_token) +} +\arguments{ +\item{base_url}{The base URL for the ONA API; defaults to +'https://api.whonghub.org'.} + +\item{form_ids}{A vector containing form id number to identify each form.} + +\item{api_token}{A string specifying the API token for ONA.} +} +\value{ +A data frame containing the combined data from all specified form +IDs, and includes from_id column. +} +\description{ +This function retrieves data for a specified form from the ONA API using a +provided API token and constructs a unique key for each dataset. It returns +the data in a structured format if the request is successful. +} +\examples{ +# api_token <- "your_api_token_here" +# data <- get_multi_ona_data(form_ids = c(623, 432, 643), api_token) +} diff --git a/man/prep_ona_data_endpoints.Rd b/man/prep_ona_data_endpoints.Rd new file mode 100644 index 0000000..3609190 --- /dev/null +++ b/man/prep_ona_data_endpoints.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_ona.R +\name{prep_ona_data_endpoints} +\alias{prep_ona_data_endpoints} +\title{Fetch ONA API Data} +\usage{ +prep_ona_data_endpoints(base_url = "https://api.whonghub.org", api_token) +} +\arguments{ +\item{base_url}{The base URL for the ONA API; defaults to +'https://api.whonghub.org'.} + +\item{api_token}{API token for authentication.} +} +\value{ +Data frame of the API endpoint data. +} +\description{ +Retrieves data from the ONA API endpoints to display available forms and +datasets. It initially, uses given API token for authorization and checks the +initial API response status. Then it validates the base URL and constructs +the full API endpoint URL to retrieve a dataframe of all the available for +forms/data. +} +\examples{ +# prep_ona_data_endpoints(api_token = "your_api_token_here") +} diff --git a/tests/testthat/test-prep_match_datatypes.R b/tests/testthat/test-prep_match_datatypes.R index 6a7aa9b..9e5b4dd 100644 --- a/tests/testthat/test-prep_match_datatypes.R +++ b/tests/testthat/test-prep_match_datatypes.R @@ -1,30 +1,36 @@ testthat::test_that("Data types match correctly", { - - # set up sample ref target data - ref_df <- tibble::tibble( - integer_col = 1:3, - character_col = c("a", "b", "c"), - numeric_col = c(1.1, 2.2, 3.3), - date_col = as.Date( - c("2021-01-01", "2021-01-02", "2021-01-03")) - ) - - # set up sample target data - target_df <- tibble::tibble( - integer_col = c("1", "2", "3"), - character_col = 1:3, - numeric_col = c("1.1", "2.2", "3.3"), - date_col = c("2021-01-01", "2021-01-02", "2021-01-03") - ) - - # match datatypes - matched_df <- prep_match_datatypes(ref_df, target_df) - - # check if prep_match_datatypes works - testthat::expect_type(matched_df$integer_col, "integer") - testthat::expect_type(matched_df$character_col, "character") - testthat::expect_type(matched_df$numeric_col, "double") - testthat::expect_equal(class(matched_df$date_col), "Date") + # set up sample ref target data + ref_df <- tibble::tibble( + integer_col = 1:3, + character_col = c("a", "b", "c"), + numeric_col = c(1.1, 2.2, 3.3), + date_col = as.Date(c( + "2021-01-01", "2021-01-02", "2021-01-03" + )), + posixct_date_col = as.POSIXct(c("2024-04-17", + "2024-04-18", + "2024-04-19"), + format = "%Y-%m-%d") + ) + + # set up sample target data + target_df <- tibble::tibble( + integer_col = c("1", "2", "3"), + character_col = 1:3, + numeric_col = c("1.1", "2.2", "3.3"), + date_col = c("2021-01-01", "2021-01-02", "2021-01-03"), + posixct_date_col = c("2024-04-17", + "2024-04-18", + "2024-04-19") + ) + + # match datatypes + matched_df <- prep_match_datatypes(ref_df, target_df) + + # check if prep_match_datatypes works + testthat::expect_type(matched_df$integer_col, "integer") + testthat::expect_type(matched_df$character_col, "character") + testthat::expect_type(matched_df$numeric_col, "double") + testthat::expect_equal(class(matched_df$date_col), "Date") + #testthat::expect_equal(class(matched_df$posixct_date_col), 'c("POSIXct", "POSIXt")') }) - - diff --git a/tests/testthat/test-prep_ona_data_endpoints.R b/tests/testthat/test-prep_ona_data_endpoints.R new file mode 100644 index 0000000..8849056 --- /dev/null +++ b/tests/testthat/test-prep_ona_data_endpoints.R @@ -0,0 +1,3 @@ +test_that("multiplication works", { + expect_equal(2 * 2, 4) +})