Merge pull request #9 from WorldHealthOrganization/feature_dev

Feature dev
WorldHealthOrganization · Apr 29, 2024 · cd65a3d · cd65a3d
2 parents 5f376cf + 35c4401
commit cd65a3d
Show file tree

Hide file tree

Showing 9 changed files with 235 additions and 47 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -30,7 +30,8 @@ Imports:
   jsonlite,
   tidyselect,
   httpcode,
-  tibble
+  tibble,
+  pbmcapply
 Suggests: 
   testthat,
   withr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,9 +1,11 @@
 # Generated by roxygen2: do not edit by hand
 
+export(get_multi_ona_data)
 export(get_ona_data)
 export(prep_geonames)
 export(prep_match_datatypes)
 export(prep_match_names)
+export(prep_ona_data_endpoints)
 export(read)
 export(save)
 importFrom(cli,cli_alert_warning)

diff --git a/R/get_ona.R b/R/get_ona.R
@@ -32,6 +32,59 @@ check_status_api <- function(response) {
   }
 }
 
+#' Fetch ONA API Data
+#'
+#' Retrieves data from the ONA API endpoints to display available forms and 
+#' datasets. It initially, uses given API token for authorization and checks the 
+#' initial API response status. Then it validates the base URL and constructs 
+#' the full API endpoint URL to retrieve a dataframe of all the available for 
+#' forms/data.
+#'
+#' @param base_url The base URL for the ONA API; defaults to 
+#'            'https://api.whonghub.org'.
+#' @param api_token API token for authentication.
+#'
+#' @return Data frame of the API endpoint data.
+#' 
+#' @export  
+#' @examples
+#' # prep_ona_data_endpoints(api_token = "your_api_token_here")
+prep_ona_data_endpoints <- function(
+    base_url = "https://api.whonghub.org", api_token) {
+
+  # check base url validity
+  base_url_pattern <- "^(https?://[^/]+).*"
+  if ( grepl(base_url_pattern, base_url)){
+    base_url <- sub(base_url_pattern, "\\1", base_url)
+  } else {
+    stop(paste("Error: ", base_url, " is not a valid base url"))
+  }
+
+  # set up URL
+  api_url <- paste0(base_url,"/api/v1/data")
+
+  # Validate base url link first -----------------------------------------------
+
+  # get head before download
+  response <- httr::HEAD(
+    base_url, 
+    config = httr::add_headers(Authorization = paste("Token", api_token)))
+
+  # check status of call
+  check_status_api(response)
+
+  # Validate url link before downloading ---------------------------------------
+
+  # get one data endpoint df 
+  response <- httr::GET(
+    api_url, 
+    config = httr::add_headers(Authorization = paste("Token", api_token))) |> 
+    httr::content("text", encoding = "UTF-8") |> 
+    jsonlite::fromJSON(simplifyDataFrame = TRUE)
+
+  return(response)
+}
+
 #' Get a Page of Data from an API
 #'
 #' This function retrieves a single page of data from a specified API endpoint.
@@ -118,15 +171,20 @@ get_ona_data <- function(
   get_next_page <- TRUE
   start <- 0
 
-  # Validate url link before downloading ---------------------------------------
-
-  # get head before download
-  response <- httr::HEAD(
-    api_url, 
-    config = httr::add_headers(Authorization = paste("Token", api_token)))
+  # Check if the form id is available for download -----------------------------
 
-  # check status of call
-  check_status_api(response)
+  resp_data <- prep_ona_data_endpoints(
+    base_url = base_url,
+    api_token = api_token)
+
+  if (!(form_id %in% unique(resp_data$id))) {
+    cli::cli_abort(
+      paste0("Form IDs ", 
+             toString(form_id), 
+             " not found. Use `prep_ona_data_endpoints()` ",
+             "to check available forms for download.")
+    )
+  }
 
   # Download data (use pagination if necessary) --------------------------------
 
@@ -172,3 +230,58 @@ get_ona_data <- function(
 
   return(results)
 }
+
+#' Get Data from ONA for Multiple Forms
+#'
+#' This function retrieves data for a specified form from the ONA API using a 
+#' provided API token and constructs a unique key for each dataset. It returns 
+#' the data in a structured format if the request is successful.
+#'
+#' @param base_url The base URL for the ONA API; defaults to 
+#'            'https://api.whonghub.org'.
+#' @param form_ids A vector containing form id number to identify each form.
+#' @param api_token A string specifying the API token for ONA.
+#'
+#' @return A data frame containing the combined data from all specified form 
+#'        IDs, and includes from_id column.
+#' @examples
+#' # api_token <- "your_api_token_here"
+#' # data <- get_multi_ona_data(form_ids = c(623, 432, 643), api_token)
+#' @export
+get_multi_ona_data <- function(
+    base_url = "https://api.whonghub.org", form_ids, api_token) {
+
+  # Check if the form IDs are available for download ---------------------------
+  resp_data <- prep_ona_data_endpoints(
+    base_url = base_url,
+    api_token = api_token
+  )
+
+  if (!all(form_ids %in% unique(resp_data$id))) {
+    missing_ids <- form_ids[!form_ids %in% unique(resp_data$id)]
+    cli::cli_abort(
+      paste0("Form IDs ", 
+             toString(missing_ids), 
+             " not found. Use `prep_ona_data_endpoints()` ",
+             "to check available forms for download.")
+    )
+  }
+
+  # Fetch data in parallel for each form ID ------------------------------------
+  results_list <- pbmcapply::pbmclapply(
+    form_ids,
+    function(form_id) {
+      get_ona_data(form_id = form_id, api_token = api_token)
+    },
+    mc.cores = parallel::detectCores() - 1
+  )
+
+  # Set names for each element in the results list to match form_ids
+  names(results_list) <- as.character(form_ids)
+
+  # Combine all into one df
+  combined_data <- dplyr::bind_rows(results_list, .id = "form_id")
+
+  return(combined_data)
+}
+
diff --git a/R/prep_match_datatypes.R b/R/prep_match_datatypes.R
@@ -47,13 +47,20 @@ prep_match_datatypes <- function(ref_dataframe, target_dataframe) {
         target_dataframe <- target_dataframe |>
           dplyr::mutate(!!col := as.logical(!!rlang::sym(col)))
       } else if (col_type == "Date") {
-
         target_dataframe <- target_dataframe |>
           dplyr::mutate(!!col := as.Date(!!rlang::sym(col)
-                                         # format = "Y-%m-%d"
           ))
-      }
+      } #else if (col_type == 'c("POSIXct", "POSIXt")') {
+      #   target_dataframe <- target_dataframe |>
+      #     dplyr::mutate(!!col == as.POSIXct(!!rlang::sym(col),
+      #                                       format = "%Y-%m-%d"))
+      # }  else if (col_type == 'c("POSIXlt", "POSIXt")') {
+      #   target_dataframe <- target_dataframe |>
+      #     dplyr::mutate(!!col == as.POSIXlt(!!rlang::sym(col),
+      #                                       format = "%Y-%m-%d"))
+      # }
     }
   }
   return(target_dataframe)
 }
+
diff --git a/R/prep_match_names.R b/R/prep_match_names.R
@@ -73,23 +73,23 @@ prep_match_names <- function(ref_dataframe, target_dataframe, report = TRUE) {
       setdiff(names(target_dataframe), names(ref_dataframe))
     if (length(no_match_cols) > 0) {
       print(
-        paste(
-          "No match found for ",
-          no_match_cols,
-          ". initial name maintained. A total of ",
           length(names(no_match_cols)),
           "/",
           length(names(no_match_cols)),
           " columns from the target dataframe were matched to the reference dataframe"
         )
-      )
+      print(
+          "No match found for ",
+          no_match_cols,
+          ". initial name maintained. A total of "
+        )
     } else{
       print(
         paste(
           "All columns were successfully matched and renamed. A total of ",
-          length(names(common_cols)),
+          length(common_cols),
           "/",
-          length(names(common_cols)),
+          length(common_cols),
           " columns from the target dataframe were matched to the reference dataframe"
         )
       )

diff --git a/man/get_multi_ona_data.Rd b/man/get_multi_ona_data.Rd
diff --git a/man/prep_ona_data_endpoints.Rd b/man/prep_ona_data_endpoints.Rd
diff --git a/tests/testthat/test-prep_match_datatypes.R b/tests/testthat/test-prep_match_datatypes.R
@@ -1,30 +1,36 @@
 testthat::test_that("Data types match correctly", {
-
-    # set up sample ref target data
-    ref_df <- tibble::tibble(
-        integer_col = 1:3,
-        character_col = c("a", "b", "c"),
-        numeric_col = c(1.1, 2.2, 3.3),
-        date_col = as.Date(
-            c("2021-01-01", "2021-01-02", "2021-01-03"))
-    )
-
-    # set up sample target data
-    target_df <- tibble::tibble(
-        integer_col = c("1", "2", "3"),
-        character_col = 1:3,
-        numeric_col = c("1.1", "2.2", "3.3"),
-        date_col = c("2021-01-01", "2021-01-02", "2021-01-03")
-    )
-
-    # match datatypes
-    matched_df <- prep_match_datatypes(ref_df, target_df)
-
-    # check if prep_match_datatypes works
-    testthat::expect_type(matched_df$integer_col, "integer")
-    testthat::expect_type(matched_df$character_col, "character")
-    testthat::expect_type(matched_df$numeric_col, "double")
-    testthat::expect_equal(class(matched_df$date_col), "Date")
+  # set up sample ref target data
+  ref_df <- tibble::tibble(
+    integer_col = 1:3,
+    character_col = c("a", "b", "c"),
+    numeric_col = c(1.1, 2.2, 3.3),
+    date_col = as.Date(c(
+      "2021-01-01", "2021-01-02", "2021-01-03"
+    )),
+    posixct_date_col = as.POSIXct(c("2024-04-17",
+                                    "2024-04-18",
+                                    "2024-04-19"),
+                                  format = "%Y-%m-%d")
+  )
+
+  # set up sample target data
+  target_df <- tibble::tibble(
+    integer_col = c("1", "2", "3"),
+    character_col = 1:3,
+    numeric_col = c("1.1", "2.2", "3.3"),
+    date_col = c("2021-01-01", "2021-01-02", "2021-01-03"),
+    posixct_date_col = c("2024-04-17",
+                         "2024-04-18",
+                         "2024-04-19")
+  )
+
+  # match datatypes
+  matched_df <- prep_match_datatypes(ref_df, target_df)
+
+  # check if prep_match_datatypes works
+  testthat::expect_type(matched_df$integer_col, "integer")
+  testthat::expect_type(matched_df$character_col, "character")
+  testthat::expect_type(matched_df$numeric_col, "double")
+  testthat::expect_equal(class(matched_df$date_col), "Date")
+  #testthat::expect_equal(class(matched_df$posixct_date_col), 'c("POSIXct", "POSIXt")')
 })
-
-
diff --git a/tests/testthat/test-prep_ona_data_endpoints.R b/tests/testthat/test-prep_ona_data_endpoints.R
@@ -0,0 +1,3 @@
+test_that("multiplication works", {
+  expect_equal(2 * 2, 4)
+})