Skip to content

Commit

Permalink
Refactor get_updated_ona_data
Browse files Browse the repository at this point in the history
  • Loading branch information
truenomad committed Feb 6, 2025
1 parent f5bd779 commit 5d73664
Showing 1 changed file with 62 additions and 57 deletions.
119 changes: 62 additions & 57 deletions R/get_ona.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ check_status_api <- function(response) {
call. = FALSE
)
}

# get resoinse code
response_status_code <- httr::status_code(response)
# get http code
code <- httpcode::http_code(response_status_code)

if (code$status_code != 200) {
stop(
glue::glue(
Expand Down Expand Up @@ -63,31 +63,31 @@ prep_ona_data_endpoints <- function(
} else {
stop(paste("Error: ", base_url, " is not a valid base url"))
}

# set up URL
api_url <- paste0(base_url, "/api/v1/data")

# Validate base url link first -----------------------------------------------

# get head before download
response <- httr::HEAD(
base_url,
config = httr::add_headers(Authorization = paste("Token", api_token))
)

# check status of call
check_status_api(response)

# Validate url link before downloading ---------------------------------------

# get one data endpoint df
response <- httr::GET(
api_url,
config = httr::add_headers(Authorization = paste("Token", api_token))
) |>
httr::content("text", encoding = "UTF-8") |>
jsonlite::fromJSON(simplifyDataFrame = TRUE)

return(response)
}

Expand Down Expand Up @@ -126,24 +126,24 @@ prep_ona_data_endpoints <- function(
process_comparison_filters <- function(...) {
filters <- list(...)
converted_filters <- list()

for (filter in filters) {
# Check if the filter is a formula
if (inherits(filter, "formula")) {
filter_expr <- filter[[2]]

# If the formula contains multiple conditions combined with &
if (filter_expr[[1]] == as.name("&")) {
# Initialize a temporary list to hold conditions for the same field
field_conditions <- list()

# Process each condition individually
sub_conditions <- as.list(filter_expr)[-1]
for (cond in sub_conditions) {
op <- as.character(cond[[1]])
field_name <- as.character(cond[[2]])
value <- as.character(cond[[3]])

# Map R operators to MongoDB operators
operator_map <- list(
">" = "$gt",
Expand All @@ -154,19 +154,19 @@ process_comparison_filters <- function(...) {
"!=" = "$ne"
)
mongo_op <- operator_map[[op]]

# Add each condition to the field's condition list
field_conditions[[mongo_op]] <- value
}

# Assign combined conditions to the field name in the main filter
converted_filters[[field_name]] <- field_conditions
} else {
# Handle single comparison formulas
operator <- as.character(filter_expr[[1]])
field_name <- as.character(filter_expr[[2]])
value <- as.character(filter_expr[[3]])

# Map operators
operator_map <- list(
">" = "$gt",
Expand All @@ -177,23 +177,23 @@ process_comparison_filters <- function(...) {
"!=" = "$ne"
)
mongo_operator <- operator_map[[operator]]

# Single condition with correct field name
converted_filters[[field_name]] <- setNames(list(value), mongo_operator)
}

# Handle vector filters as simple arrays without $in
} else if (is.vector(filter) && length(filter) > 1) {
field_name <- names(filter)[1]
converted_filters[[field_name]] <- filter[[1]]

# Handle single-value fields
} else {
field_name <- names(filter)[1]
converted_filters[[field_name]] <- filter[[1]]
}
}

# Convert to JSON format
jsonlite::toJSON(converted_filters, auto_unbox = TRUE)
}
Expand All @@ -220,19 +220,19 @@ process_comparison_filters <- function(...) {
process_logical_filters <- function(filters) {
# List of logical operators
logical_operators <- c("$and", "$or", "$nor", "$not")

# Check if filters already contain logical operators
if (any(names(filters) %in% logical_operators)) {
# Return filters as-is
return(filters)
}

single_conditions <- list()
or_conditions <- list()

for (field_name in names(filters)) {
field_values <- filters[[field_name]]

if (length(field_values) == 1) {
# Single value, add directly
single_conditions[[field_name]] <- field_values
Expand All @@ -245,7 +245,7 @@ process_logical_filters <- function(filters) {
}
}
}

if (length(or_conditions) > 0 && length(single_conditions) > 0) {
# Combine single conditions and or conditions with '$and'
final_query <- list("$and" = c(
Expand All @@ -259,9 +259,9 @@ process_logical_filters <- function(filters) {
# Only single conditions
final_query <- single_conditions
}

jsonlite::toJSON(final_query,
auto_unbox = TRUE
auto_unbox = TRUE
)
}

Expand Down Expand Up @@ -317,22 +317,22 @@ get_ona_form <- function(base_url = "https://api.whonghub.org",
comparison_filters = NULL) {
# Check base URL validity
base_url <- validate_base_url(base_url)

# Set up API URL
api_url <- paste0(base_url, "/api/v1/data/", form_id)

# Start the CLI process
process_id <- cli::cli_process_start(
paste0("Getting form '", form_id, "' from ONA server")
)
cat("\n")

# Check if the form id is available for download
resp_data <- prep_ona_data_endpoints(
base_url = base_url,
api_token = api_token
)

if (!(form_id %in% unique(resp_data$id))) {
cli::cli_abort(
paste0(
Expand All @@ -343,27 +343,27 @@ get_ona_form <- function(base_url = "https://api.whonghub.org",
)
)
}

# Initialize query parameters list
query_params <- list()

# Convert selected_columns to JSON array string
if (!is.null(selected_columns)) {
fields_json <- jsonlite::toJSON(selected_columns, auto_unbox = FALSE)
} else {
fields_json <- NULL
}

# Process filters and combine them if both are present
query_json <- if (!is.null(logical_filters) && !is.null(comparison_filters)) {
comparison <- process_comparison_filters(comparison_filters) |>
jsonlite::minify() |>
gsub("^\\{|\\}$", "", x = _)

logical <- process_logical_filters(logical_filters) |>
jsonlite::minify() |>
gsub("^\\{|\\}$", "", x = _)

paste0("{", comparison, ",", logical, "}") |>
jsonlite::minify()
} else if (!is.null(comparison_filters)) {
Expand All @@ -373,7 +373,7 @@ get_ona_form <- function(base_url = "https://api.whonghub.org",
} else {
NULL
}

# Build the full URL with query parameters
full_url <- httr::modify_url(
api_url,
Expand All @@ -382,24 +382,24 @@ get_ona_form <- function(base_url = "https://api.whonghub.org",
query = query_json
)
)

# Download data (use pagination if necessary) --------------------------------

results <- get_paginated_data(full_url, api_token) |>
# Drop any empty columns
dplyr::select(
dplyr::where(
~ any(!is.na(.))
)
)

# Finish the CLI process
cat("\n")
cli::cli_process_done(
process_id,
msg_done = "Download complete! {praise_emoji()}"
)

return(results)
}

Expand Down Expand Up @@ -467,7 +467,7 @@ get_paginated_data <- function(api_url, api_token) {
api_limit <- 100000
results <- list()
page_number <- 1

repeat {
# Append page and page_size parameters
paged_url <- paste0(
Expand All @@ -476,18 +476,18 @@ get_paginated_data <- function(api_url, api_token) {
"page=", page_number,
"&page_size=100000"
)

current_page <- get_ona_page(paged_url, api_token) |>
as.data.frame() |>
dplyr::mutate(dplyr::across(tidyselect::everything(), as.character))

results <- dplyr::bind_rows(results, current_page)

if (nrow(current_page) < api_limit || nrow(current_page) == 0) break

page_number <- page_number + 1
}

return(dplyr::distinct(results))
}

Expand All @@ -507,7 +507,7 @@ praise_emoji <- function() {
if (!cli::is_utf8_output()) {
return("")
}

emoji <- c(
"\U0001f600",
"\U0001f973",
Expand Down Expand Up @@ -542,7 +542,7 @@ get_multi_ona_data <- function(base_url = "https://api.whonghub.org",
details =
"Use `get_ona_data()` which now supports both single and multiple form IDs"
)

get_ona_data(
base_url = base_url,
form_ids = form_ids,
Expand Down Expand Up @@ -667,7 +667,7 @@ generate_urls <- function(full_data, file_path,
)
) |>
dplyr::pull(date_last_updated)

# Construct URL with last edited parts
paste0(
base_url, "/api/v1/data/", form_id,
Expand All @@ -682,7 +682,7 @@ generate_urls <- function(full_data, file_path,
)
}
})

urls
}

Expand Down Expand Up @@ -749,6 +749,8 @@ call_urls <- function(urls, api_token) {
#' It's in stringed list like c("year", "form_id")
#' @param data_file_name The base name for the data file, defaults to
#' "my_ona_data".
#' @param return_results An option to return the full results as an output,
#' in addition to the data being saved. Default is FALSE.
#'
#' @return Returns a data frame containing the combined new and existing data.
#'
Expand All @@ -761,7 +763,8 @@ get_updated_ona_data <- function(base_url = "https://api.whonghub.org",
form_ids, api_token,
log_results = TRUE, file_path = NULL,
selected_columns = NULL,
data_file_name = "my_ona_data") {
data_file_name = "my_ona_data",
return_results = FALSE) {
# check base url validity
base_url <- validate_base_url(base_url)

Expand Down Expand Up @@ -930,12 +933,13 @@ get_updated_ona_data <- function(base_url = "https://api.whonghub.org",
log_data <- log_messages
}

rownames(log_data) <- NULL

# Save log file
poliprep::save(
janitor::clean_names(log_data), log_file_name
)


cat("\n")
if (nrow(new_data) > 0) {
cli::cli_alert_success(
Expand All @@ -948,13 +952,14 @@ get_updated_ona_data <- function(base_url = "https://api.whonghub.org",
} else {
cli::cli_alert_info("No new data available. Everything is up to date.")
}

}

# Return output message and save results -------------------------------------

# save full data
poliprep::save(full_data, file_name)
if (nrow(full_data_orig) == 0 || nrow(new_data) > 0) {
# save full data
poliprep::save(full_data, file_name)
}

return(full_data)
if (return_results) { return(full_data) }
}

0 comments on commit 5d73664

Please sign in to comment.