diff --git a/.github/workflows/lint-changed-files.yaml b/.github/workflows/lint-changed-files.yaml new file mode 100644 index 00000000..55cc0904 --- /dev/null +++ b/.github/workflows/lint-changed-files.yaml @@ -0,0 +1,44 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + pull_request: + branches: [main, master] + +name: lint-changed-files + +jobs: + lint-changed-files: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-r@v2 + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + any::gh + any::lintr + any::purrr + needs: check + + - name: Add lintr options + run: | + cat('\noptions(lintr.linter_file = ".lintr")\n', file = "~/.Rprofile", append = TRUE) + shell: Rscript {0} + + - name: Install package + run: R CMD INSTALL . + + - name: Extract and lint files changed by this PR + run: | + files <- gh::gh("GET https://api.github.com/repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/files") + changed_files <- purrr::map_chr(files, "filename") + all_files <- list.files(recursive = TRUE) + exclusions_list <- as.list(setdiff(all_files, changed_files)) + lintr::lint_package(exclusions = exclusions_list) + shell: Rscript {0} + env: + LINTR_ERROR_ON_LINT: true diff --git a/DESCRIPTION b/DESCRIPTION index fa020b36..bc72a3fe 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: pipapi Title: API for the Poverty and Inequality Platform -Version: 1.3.3 +Version: 1.3.3.9000 Authors@R: c(person(given = "Tony", family = "Fujs", @@ -34,7 +34,8 @@ Suggests: httr, jsonlite, future.callr, - future.apply + future.apply, + lintr Language: en-US Imports: data.table, diff --git a/NEWS.md b/NEWS.md index 54562a71..fcaba9a4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# pipapi (development version) +## New features +- Add new SPR and SPL indicators + +## Enhancements +- Address some linting issues +- Increase test coverage + # pipapi 1.3.3 - Fix bug with ag_average_poverty_stats - Better control of returned columns diff --git a/R/add_agg_stats.R b/R/add_agg_stats.R index 51bfb3dc..18c21a5c 100644 --- a/R/add_agg_stats.R +++ b/R/add_agg_stats.R @@ -56,9 +56,10 @@ ag_average_poverty_stats <- function(df, return_cols) { ## Only numeric variables will be aggregated or averaged ## Year variables must not be modified years_vars <- grep("year", names(df), value = TRUE) - years_vars <- years_vars[!vapply(df[, ..years_vars], + years_vars <- years_vars[!vapply(df[, .SD, .SDcols = years_vars], is.logical, FUN.VALUE = logical(1))] + df[, (years_vars) := lapply(.SD, as.character), .SDcols = years_vars] @@ -96,10 +97,6 @@ ag_average_poverty_stats <- function(df, return_cols) { # STEP 3: Calculations ---------- ## weighted average ------ - totpop <- sum(df$reporting_pop) - - wgt <- df$reporting_pop/totpop - wgt_df <- df |> # this grouping is not necessary, but ensures data.frame as output collapse::fgroup_by(c("country_code", "reporting_year", "welfare_type")) |> @@ -117,7 +114,7 @@ ag_average_poverty_stats <- function(df, return_cols) { # STEP 4: Format results ---- ## Bind resulting tables ---- - out <- cbind(df[1, ..nonum_names], wgt_df, sum_df) + out <- cbind(df[1, .SD, .SDcols = nonum_names], wgt_df, sum_df) ## convert years back to numeric ---- out[, (years_vars) := @@ -131,7 +128,7 @@ ag_average_poverty_stats <- function(df, return_cols) { out[, (national_cols) := "national"] ## set order of obs anc col ------- - out <- out[, ..orig_names] + out <- out[, .SD, .SDcols = orig_names] data.table::setcolorder(out, orig_names) data.table::setorderv(out, c("country_code", "reporting_year","welfare_type")) diff --git a/R/create_lkups.R b/R/create_lkups.R index eac7cb56..57a3e14c 100644 --- a/R/create_lkups.R +++ b/R/create_lkups.R @@ -264,10 +264,12 @@ create_lkups <- function(data_dir, versions) { 'reporting_pce', 'is_interpolated', 'distribution_type', - 'estimation_type'#, - # 'spl', - # 'spr' + 'estimation_type', + 'spl', + 'spr' ), + + # This is not used anywhere anymore. dist_stats = c( "country_code", "reporting_year", @@ -289,16 +291,16 @@ create_lkups <- function(data_dir, versions) { "poverty_severity", "watts", "mean", - "pop_in_poverty"#, - #"spr" + "pop_in_poverty", + "spr" ), weighted_average_cols = c( "headcount", "poverty_gap", "poverty_severity", "watts", - "mean"#, - #"spr" + "mean", + "spr" ) ), ui_pc_charts = list( @@ -337,7 +339,8 @@ create_lkups <- function(data_dir, versions) { 'poverty_gap', 'poverty_severity', 'watts', - 'pop_in_poverty'#, 'spr' + 'pop_in_poverty', + 'spr' ), inequality_indicators = c( 'median', @@ -363,8 +366,8 @@ create_lkups <- function(data_dir, versions) { "headcount", "poverty_gap", "poverty_severity", - "watts"#, - #"spr" + "watts", + "spr" ), zero_vars = c( "mean", diff --git a/R/fg_pip.R b/R/fg_pip.R index 11e2126d..0d86f64c 100644 --- a/R/fg_pip.R +++ b/R/fg_pip.R @@ -119,29 +119,10 @@ fg_pip <- function(country, out[, poverty_line := round(poverty_line, digits = 3) ] - - # add SPL --------- - - data_dir <- lkup$data_root - spl <- - get_aux_table(data_dir = data_dir, - table = "spl") - - out <- merge.data.table( - x = out, - y = spl, - by = c( - "country_code", - "reporting_year", - "welfare_type", - "reporting_level" - ), - all.x = TRUE - ) - - if (any(names(out) == "spl_headcount")) { - data.table::setnames(out, "spl_headcount", "spr") - } + # Add SPL and SPR --------------- + out <- add_spl(df = out, + fill_gaps = TRUE, + data_dir = lkup$data_root) return(out) } diff --git a/R/get_aux_table.R b/R/get_aux_table.R index c2f82b60..24b43d8e 100644 --- a/R/get_aux_table.R +++ b/R/get_aux_table.R @@ -19,6 +19,7 @@ get_aux_table <- function(data_dir, table, long_format = FALSE) { # Strip all "non-word" characters from user input sanitized_table <- gsub("\\W", "", table) + out <- fst::read_fst(sprintf( "%s/_aux/%s.fst", data_dir, @@ -29,7 +30,7 @@ get_aux_table <- function(data_dir, table, long_format = FALSE) { out <- data.table::melt(out, id.vars = c('country_code', 'data_level'), variable.name = "year") - data.table::setorder(out, country_code, year, data_level) + data.table::setorder(out, "country_code", "year", "data_level") } return(out) diff --git a/R/get_param_values.R b/R/get_param_values.R index d1b8e1ce..ae30c26b 100644 --- a/R/get_param_values.R +++ b/R/get_param_values.R @@ -12,7 +12,7 @@ get_param_values <- function(lkup, "pip-grp", "pip-info", "valid-params")) { - endpoint <- endpoint[1] # Ensure it only passes one endpoint at a time + # endpoint <- endpoint[1] # Ensure it only passes one endpoint at a time endpoint <- match.arg(endpoint) # TO IMPROVE: Too much hard-coding here endpoint_map <- c("all", diff --git a/R/get_pip_version.R b/R/get_pip_version.R index 7d459e19..accd4819 100644 --- a/R/get_pip_version.R +++ b/R/get_pip_version.R @@ -6,7 +6,7 @@ #' @export get_pip_version <- function(pip_packages = c("pipapi", "wbpip"), - data_versions = lkups$versions) { + data_versions) { # PIP package versions core_packages <- lapply(pip_packages, function(x){ diff --git a/R/pip.R b/R/pip.R index 23a8567d..e266e1d6 100644 --- a/R/pip.R +++ b/R/pip.R @@ -68,6 +68,8 @@ pip <- function(country = "ALL", additional_ind = FALSE) { + # set up ------------- + welfare_type <- match.arg(welfare_type) reporting_level <- match.arg(reporting_level) group_by <- match.arg(group_by) @@ -83,6 +85,7 @@ pip <- function(country = "ALL", stop("You are probably passing more than one dataset as lkup argument. Try passing a single one by subsetting it lkup <- lkups$versions_paths$dataset_name_PROD") + # **** TO BE REMOVED **** REMOVAL STARTS HERE # Once `pip-grp` has been integrated in ingestion pipeline # Forces fill_gaps to TRUE when using group_by option @@ -92,6 +95,7 @@ pip <- function(country = "ALL", } # **** TO BE REMOVED **** REMOVAL ENDS HERE + # Countries vector ------------ lcv <- # List with countries vectors create_countries_vctr( country = country, @@ -100,9 +104,9 @@ pip <- function(country = "ALL", aux_files = lkup$aux_files ) - + # mains estimates --------------- if (fill_gaps) { - # Compute imputed stats + ## lineup years----------------- out <- fg_pip( country = lcv$est_ctrs, year = year, @@ -114,7 +118,7 @@ pip <- function(country = "ALL", lkup = lkup ) } else { - # Compute survey year stats + ## survey years ------------------ out <- rg_pip( country = lcv$est_ctrs, year = year, @@ -127,15 +131,17 @@ pip <- function(country = "ALL", ) } - # return empty dataframe if no metadata is found + # Eary return for empty table--------------- if (nrow(out) == 0) { return(out) } - # Handles aggregated distributions + # aggregate distributions ------------------ if (reporting_level %in% c("national", "all")) { - out <- add_agg_stats(out, - return_cols = lkup$return_cols$ag_average_poverty_stats) + out <- add_agg_stats( + df = out, + return_cols = lkup$return_cols$ag_average_poverty_stats + ) if (reporting_level == "national") { out <- out[reporting_level == "national"] } @@ -175,7 +181,8 @@ pip <- function(country = "ALL", } # **** TO BE REMOVED **** REMOVAL ENDS HERE - # Add pre-computed distributional statistics + + # pre-computed distributional stats --------------- crr_names <- names(out) # current variables names2keep <- lkup$return_cols$pip$cols # all variables @@ -184,14 +191,22 @@ pip <- function(country = "ALL", dist_stats = lkup[["dist_stats"]] ) + # Add aggregate medians ---------------- + out <- add_agg_medians( + df = out, + fill_gaps = fill_gaps, + data_dir = lkup$data_root + ) + + # format ---------------- + ## Inequality indicators to NA for lineup years ---- if (fill_gaps) { - # Convert inequality indicators to NA + dist_vars <- names2keep[!(names2keep %in% crr_names)] out[, (dist_vars) := NA_real_] } - - # Handle survey coverage + ## Handle survey coverage ------------ if (reporting_level != "all") { keep <- out$reporting_level == reporting_level out <- out[keep, ] @@ -228,6 +243,6 @@ pip <- function(country = "ALL", # Order rows by country code and reporting year data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type) - + # return ------------- return(out) } diff --git a/R/pip_grp.R b/R/pip_grp.R index 753a08e3..3fd02a84 100644 --- a/R/pip_grp.R +++ b/R/pip_grp.R @@ -97,7 +97,7 @@ pip_grp <- function(country = "ALL", } keep <- lkup$return_cols$pip_grp$cols - out <- out[, ..keep] + out <- out[, .SD, .SDcols = keep] return(out) } @@ -154,7 +154,7 @@ pip_aggregate <- function(df, by = NULL, return_cols) { # Handle simple aggregation - df <- df[, ..to_keep] + df <- df[, .SD, .SDcols = to_keep] byvar <- c(by, "reporting_year", "poverty_line") @@ -217,7 +217,7 @@ pip_aggregate_by <- function(df, to_keep <- all_cols[all_cols != "pop_in_poverty"] - df <- df[, ..to_keep] + df <- df[, .SD, .SDcols = to_keep] group_lkup <- group_lkup[, c("region_code", "reporting_year", diff --git a/R/pip_grp_logic.R b/R/pip_grp_logic.R index 6091b433..d69008af 100644 --- a/R/pip_grp_logic.R +++ b/R/pip_grp_logic.R @@ -193,8 +193,8 @@ pip_grp_logic <- function(country = "ALL", # Find common variables common_vars <- intersect(names(fdt), names(mdt)) - fdt <- fdt[, ..common_vars] - mdt <- mdt[, ..common_vars] + fdt <- fdt[, .SD, .SDcols = common_vars] + mdt <- mdt[, .SD, .SDcols = common_vars] ## Append with countries with missing data ----- l_fg[[i]] <- data.table::rbindlist(list(fdt, mdt), @@ -219,7 +219,7 @@ pip_grp_logic <- function(country = "ALL", de <- data.table::rbindlist(ld, use.names = TRUE) rm(ld) - vars_to_keep <- names(off_ret) + # vars_to_keep <- names(off_ret) # de[, ] # Append official regions with Alt aggregates --------- @@ -312,7 +312,7 @@ pip_grp_helper <- function(lcv_country, } keep <- lkup$return_cols$pip_grp$cols - out <- out[, ..keep] + out <- out[, .SD, .SDcols = keep] return(out) } diff --git a/R/rg_pip.R b/R/rg_pip.R index 8bdf031d..dc6b05fe 100644 --- a/R/rg_pip.R +++ b/R/rg_pip.R @@ -51,16 +51,16 @@ rg_pip <- function(country, ) tmp_stats <- wbpip:::prod_compute_pip_stats( - welfare = svy_data$df0$welfare, - povline = povline, - popshare = popshare, - population = svy_data$df0$weight, - requested_mean = tmp_metadata$survey_mean_ppp, - svy_mean_lcu = tmp_metadata$survey_mean_lcu, - svy_median_lcu = tmp_metadata$survey_median_lcu, - svy_median_ppp = tmp_metadata$survey_median_ppp, - default_ppp = tmp_metadata$ppp, - ppp = ppp, + welfare = svy_data$df0$welfare, + povline = povline, + popshare = popshare, + population = svy_data$df0$weight, + requested_mean = tmp_metadata$survey_mean_ppp, + svy_mean_lcu = tmp_metadata$survey_mean_lcu, + svy_median_lcu = tmp_metadata$survey_median_lcu, + svy_median_ppp = tmp_metadata$survey_median_ppp, + default_ppp = tmp_metadata$ppp, + ppp = ppp, distribution_type = tmp_metadata$distribution_type ) @@ -73,22 +73,10 @@ rg_pip <- function(country, } out <- data.table::rbindlist(out) - - # Add SPL ------------ - keep <- lkup$return_cols$pip$dist_stats - spl <- lkup$dist_stats[, ..keep] - - out <- merge.data.table( - x = out, - y = spl, - by = c( - "country_code", - "reporting_year", - "welfare_type", - "reporting_level" - ), - all.x = TRUE - ) + # Add SPL and SPR --------------- + out <- add_spl(df = out, + fill_gaps = FALSE, + data_dir = lkup$data_root) return(out) } diff --git a/R/ui_poverty_indicators.R b/R/ui_poverty_indicators.R index 4960f5ac..5410c8d3 100644 --- a/R/ui_poverty_indicators.R +++ b/R/ui_poverty_indicators.R @@ -38,12 +38,12 @@ ui_pc_charts <- function(country = c("AGO"), # handle different responses when fill_gaps = TRUE / FALSE if (fill_gaps == FALSE) { # Return all columns when survey years are requested - out <- out[, ..return_cols] + out <- out[, .SD, .SDcols = return_cols] return(out) } else { # Set non-interpolated variables to NA if line-up years are requested - out <- out[, ..return_cols] + out <- out[, .SD, .SDcols = return_cols] out[, inequality_indicators] <- NA return(out) } diff --git a/R/utils-plumber.R b/R/utils-plumber.R index 782a9fa6..8c9ebfa2 100644 --- a/R/utils-plumber.R +++ b/R/utils-plumber.R @@ -387,8 +387,8 @@ pi_version <- function(ppp_version, identity, versions_available) { citation_from_version <- function(version) { current_date <- Sys.Date() current_year <- format(current_date, '%Y') - release_date <- extract_release_date(version) - ppp_date <- extract_ppp_date(version) + # release_date <- extract_release_date(version) + # ppp_date <- extract_ppp_date(version) citation <- sprintf('World Bank (%s), Poverty and Inequality Platform (version %s) [data set]. pip.worldbank.org. Accessed on %s', current_year, version, @@ -409,12 +409,13 @@ citation_from_version <- function(version) { #' to facilitate caching of PIP API responses #' #' @param req R6 object: Plumber API request +#' @param lkups list: pipapi master lkups #' #' @return character #' #' @export -create_etag_header <- function(req){ +create_etag_header <- function(req, lkups){ lkup_hash <- lkups$versions_paths[[req$argsQuery$version]] pipapi_hash <- packageDescription("pipapi")$GithubSHA1 wbpip_hash <- packageDescription("wbpip")$GithubSHA1 diff --git a/R/utils.R b/R/utils.R index 6a7b8660..670682cd 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,717 +1,872 @@ -utils::globalVariables( - c( - ".", "cache_id", "country_code", "cpi", "decile1", - "decile10", "decile2", "decile3", "decile4", - "decile5", "decile6", "decile7", "decile8", - "decile9", "distribution_type", "gini", - "headcount", "interpolation_id", - "is_interpolated", "median", "mld", - "polarization", "pop", "reporting_level", - "pop_in_poverty", "poverty_gap", - "poverty_line", "poverty_severity", - "ppp", "region_code", "reporting_pop", - "reporting_year", "survey_comparability", - "survey_coverage", "survey_mean_lcu", - "survey_mean_ppp", "survey_year", "watts", - "wb_region_code", "weighted.mean", - "welfare_type", "pcn_region_code", - "comparable_spell","..cols", "N", "check", - "data_interpolation_id", "display_cp", "region_name", - "sessionInfo" - ) -) - - -#' Subset look-up data -#' @inheritParams pip -#' @param valid_regions character: List of valid region codes that can be used -#' for region selection -#' @return data.frame -#' @keywords internal -subset_lkup <- function(country, - year, - welfare_type, - reporting_level, - lkup, - valid_regions) { - - # STEP 1 - Keep every row by default - keep <- rep(TRUE, nrow(lkup)) - # STEP 2 - Select countries - keep <- select_country(lkup, keep, country, valid_regions) - # STEP 3 - Select years - keep <- select_years(lkup, keep, year, country) - # STEP 4 - Select welfare_type - if (welfare_type[1] != "all") { - keep <- keep & lkup$welfare_type == welfare_type - } - # STEP 5 - Select reporting_level - keep <- select_reporting_level(lkup = lkup, - keep = keep, - reporting_level = reporting_level[1]) - - lkup <- lkup[keep, ] - - return(lkup) -} - -#' Helper to filter metadata -#' aggregate distribution need to be filtered out when popshare is not null -#' This is a temporary function until a full fix is implemented, and popshare is -#' supported for all distributions -#' -#' @param metadata data.frame: Output of `subset_lkup()` -#' @param popshare numeric: popshare value passed to `pip()` -#' -#' @return data.frame - -filter_lkup <- function(metadata, - popshare) { - # popshare option not supported for aggregate distributions - if (!is.null(popshare)) { - return( - metadata[metadata$distribution_type != "aggregate", ] - ) - } else { - return(metadata) - } - -} - - -#' helper function to correctly filter look up table according to requested -#' reporting level -#' -#' @param lkup data.table: Main lookup table -#' @param keep logical: Logical vector of rows to be kept -#' @param reporting_level character: Requested reporting level -#' -#' @return data.table -#' @export -#' -select_reporting_level <- function(lkup, - keep, - reporting_level) { - # To be updated: Fix the coverage variable names in aux data (reporting_coverage?) - if (reporting_level == "all") { - return(keep) - - } else if (reporting_level == "national") { - # Subnational levels necessary to compute national stats for aggregate distributions - keep <- keep & (lkup$reporting_level == reporting_level | lkup$is_used_for_aggregation) - return(keep) - - } else { - if ("survey_coverage" %in% names(lkup)) { - keep <- keep & - (lkup$survey_coverage == reporting_level | - lkup$reporting_level == reporting_level) - } else { - # This condition is not triggered - keep <- keep & lkup$reporting_level == reporting_level - } - return(keep) - } -} - - -#' Read survey data -#' -#' @param svy_id character: Survey ID -#' @param reporting_level character: geographical reporting level -#' @param path character: Path to survey data -#' -#' @return data.frame -#' @keywords internal -get_svy_data <- function(svy_id, - reporting_level, - path) { - # Each call should be made at a unique reporting_level (equivalent to reporting_data_level: national, urban, rural) - # This check should be conducted at the data validation stage - reporting_level <- unique(reporting_level) - assertthat::assert_that(length(reporting_level) == 1, - msg = "Problem with input data: Multiple reporting_levels" - ) - # tictoc::tic("read_single") - out <- lapply(path, function(x) { - - if (reporting_level %in% c("urban", "rural")) { # Not robust. Should not be hard coded here. - tmp <- fst::read_fst(x) - tmp <- tmp[tmp$area == reporting_level, ] - tmp <- tmp[, c("welfare", "weight")] - } else { - tmp <- fst::read_fst(x, columns = c("welfare", "weight")) - } - - return(tmp) - }) - - # Logging - # end_read_single <- tictoc::toc(quiet = TRUE) - # logger::log_info('read_single: {svy_id} {round(end_read_single$toc - end_read_single$tic, digits = getOption("digits", 6))}') - - names_out <- sprintf( - "df%s", - seq_along(svy_id) - 1 - ) - names(out) <- names_out - - return(out) -} - - -#' Add pre-computed distributional stats -#' -#' @param df data.table: Data frame of poverty statistics -#' @param dist_stats data.table: Distributional stats lookup -#' -#' @return data.table -#' @export -#' -add_dist_stats <- function(df, dist_stats) { - # Keep only relevant columns - cols <- c( - "cache_id", - # "country_code", - # "reporting_year", - # "welfare_type", - "reporting_level", - "gini", - "polarization", - "mld", - sprintf("decile%s", 1:10) - ) - dist_stats <- dist_stats[, .SD, .SDcols = cols] - - # merge dist stats with main table - # data.table::setnames(dist_stats, "survey_median_ppp", "median") - - df <- dist_stats[df, - on = .(cache_id, reporting_level), #.(country_code, reporting_year, welfare_type, reporting_level), - allow.cartesian = TRUE - ] - - return(df) -} - -#' Collapse rows -#' @return data.table -#' @noRd -collapse_rows <- function(df, vars, na_var = NULL) { - tmp_vars <- lapply(df[, .SD, .SDcols = vars], unique, collapse = "|") - tmp_vars <- lapply(tmp_vars, paste, collapse = "|") - tmp_var_names <- names(df[, .SD, .SDcols = vars]) - - if (!is.null(na_var)) df[[na_var]] <- NA_real_ - - for (tmp_var in seq_along(tmp_vars)) { - df[[tmp_var_names[tmp_var]]] <- tmp_vars[[tmp_var]] - } - - df <- unique(df) - return(df) -} - -#' Censor rows -#' Censor statistics based on a pre-defined censor table. -#' @param df data.table: Table to censor. Output from `pip()`. -#' @param censored list: List with censor tables. -#' @param type character: Type of censor table to use. Either countries or regions. -#' @return data.table -#' @noRd -censor_rows <- function(df, censored, type = c("countries", "regions")) { - - type <- match.arg(type) - - # Return early if there are no censoring observations - # if (nrow(censored[[type]]) == 0) { - # return(df) - # } - - # Create tmp_id to match with censor table - if (type == "countries") { - df$tmp_id <- - sprintf( - "%s_%s_%s_%s_%s", - df$country_code, df$reporting_year, - df$survey_acronym, df$welfare_type, - df$reporting_level - ) - } else { - df$tmp_id <- - sprintf( - "%s_%s", - df$region_code, df$reporting_year - ) - } - - # Apply censoring - out <- censor_stats(df, censored[[type]]) - out$tmp_id <- NULL - - return(out) -} - -#' Censor stats -#' @param df data.table: Table to censor. -#' @param censored_table data.table: Censor table -#' @noRd -censor_stats <- function(df, censored_table) { - - df$to_remove <- FALSE - if (any(df$tmp_id %in% censored_table$id)) { - for (i in seq_len(nrow(df))) { - for (y in seq_len(nrow(censored_table))) { - if (df$tmp_id[i] == censored_table$id[y]) { - # Remove entire row if all statistics should be removed - if (censored_table$statistic[y] == "all") { - df$to_remove[i] <- TRUE - } else { - # Otherwise set specific stats to NA - df[[censored_table$statistic[y]]][i] <- NA_real_ - } - } - } - } - } - df <- df[!df$to_remove] - df$to_remove <- NULL - - return(df) -} - -#' Create query controls -#' @param syv_lkup data.table: Survey lkup table -#' @param ref_lkup data.table: Reference lkup table -#' @param aux_files data.table: All valid regions and corresponding population -#' @param aux_tables character: List of available aux tables -#' @param versions character: List of available data versions -#' @return list -#' @noRd -create_query_controls <- function(svy_lkup, - ref_lkup, - aux_files, - aux_tables, - versions) { - # Countries and regions - countries <- unique(c( - svy_lkup$country_code, - ref_lkup$country_code - )) - - regions <- unique(c( - aux_files$regions$region_code - )) - - country <- list( - values = c( - "ALL", - sort(c( - countries, - regions) - ) - ), - type = "character" - ) - - region <- list( - values = sort(c("ALL", regions)), - type = "character" - ) - # Year - year <- list( - values = c( - "all", "MRV", - sort(unique(c( - svy_lkup$reporting_year, - ref_lkup$reporting_year - ))) - ), - type = "character" - ) - # Poverty line - povline <- list( - values = c(min = 0, max = 2700), - type = "numeric" - ) - # Popshare - popshare <- list( - values = c(min = 0, max = 1), - type = "numeric" - ) - - # Boolean parameters - fill_gaps <- - aggregate <- - long_format <- - additional_ind <- - list(values = c(TRUE, FALSE), - type = "logical") - - # Group by - group_by <- list( - values = c("none", "wb"), - type = "character" - ) - - # Welfare type - welfare_type <- list( - values = c("all", sort(unique(c( - svy_lkup$welfare_type, - ref_lkup$welfare_type - )))), - type = "character" - ) - # Reporting level - reporting_level <- list( - values = c( - "all", - sort(unique(c( - svy_lkup$reporting_level, - ref_lkup$reporting_level - ))) - ), - type = "character" - ) - # PPPs - ppp <- list( - values = c(min = 0.05, max = 1000000), # CHECK THE VALUE OF MAX - type = "numeric" - ) - # Versions - version <- list( - values = versions, - type = "character" - ) - # Formats - format <- list(values = c("json", "csv", "rds", "arrow"), - type = "character") - # Tables - table <- list(values = aux_tables, type = "character") - # parameters - parameter <- - list(values = c("country", "year", "povline", - "popshare", "fill_gaps", "aggregate", - "group_by", "welfare_type", - "reporting_level", "ppp", "version", - "format", "table", "long_format"), - type = "character") - # Endpoint - endpoint <- - list(values = c("all", - "aux", - "pip", - "pip-grp", - "pip-info", - "valid-params"), - type = "character") - - # Create list of query controls - query_controls <- list( - country = country, - region = region, - year = year, - povline = povline, - popshare = popshare, - fill_gaps = fill_gaps, - aggregate = aggregate, - long_format = long_format, - additional_ind = additional_ind, - group_by = group_by, - welfare_type = welfare_type, - reporting_level = reporting_level, - ppp = ppp, - version = version, - format = format, - table = table, - parameter = parameter, - endpoint = endpoint - ) - - return(query_controls) -} - -convert_empty <- function(string) { - if (string == "") { - "-" - } else { - string - } -} - - -#' Subset country-years table -#' This is a table created at start time to facilitate imputations -#' It part of the interpolated_list object -#' @param valid_regions character: List of valid region codes that can be used -#' @return data.frame -#' @keywords internal -subset_ctry_years <- function(country, - year, - lkup, - valid_regions) { - - keep <- TRUE - # Select data files based on requested country, year, etc. - # Select countries - if (!any(c("ALL", "WLD") %in% country)) { - # Select regions - if (any(country %in% valid_regions)) { - selected_regions <- country[country %in% valid_regions] - keep_regions <- lkup$region_code %in% selected_regions - } else { - keep_regions <- rep(FALSE, length(lkup$country_code)) - } - keep_countries <- lkup$country_code %chin% country - keep <- keep & (keep_countries | keep_regions) - } - - # if (!all(country %in% c("all", valid_regions))) { - # keep <- keep & lkup$country_code %in% country - # } - - # Select years - if (year[1] == "MRV") { - if (country[1] != "ALL") { - max_year <- max(lkup[country_code == country]$reporting_year) - } else { - max_year <- max(lkup$reporting_year) - } - keep <- keep & lkup$reporting_year %in% max_year - } - if (!year[1] %in% c("ALL", "MRV")) { - keep <- keep & lkup$reporting_year %in% as.numeric(year) - } - - lkup <- as.data.frame(lkup) - lkup <- lkup[keep, ] - - return(lkup) -} - -#' Clear cache -#' Clear cache directory if available -#' @param cd A `cachem::cache_disk()` object -#' @return list -#' @keywords internal -clear_cache <- function(cd) { - tryCatch({ - if (cd$size() > 0) { - cd$reset() - n <- cd$size() - if (n == 0) { - out <- list(status = 'success', msg = 'Cache cleared.') - } else { - out <- list(status = 'error', msg = sprintf('Something went wrong. %n items remain in cache.', n)) - } - } else { - out <- list(status = 'success', msg = 'Cache directory is empty. Nothing to clear.') - } - return(out) - }, error = function(e){ - out <- list(status = 'error', msg = 'Cache directory not found.') - return(out) - }) -} - -#' select_country -#' Helper function for subset_lkup() -#' @inheritParams subset_lkup -#' @param keep logical vector -#' @return logical vector -select_country <- function(lkup, keep, country, valid_regions) { - # Select data files based on requested country, year, etc. - # Select countries - if (!any(c("ALL", "WLD") %in% toupper(country))) { - # Select regions - if (any(country %in% valid_regions)) { - selected_regions <- country[country %in% valid_regions] - keep_regions <- lkup$region_code %in% selected_regions - } else { - keep_regions <- rep(FALSE, length(lkup$country_code)) - } - keep_countries <- lkup$country_code %in% country - keep <- keep & (keep_countries | keep_regions) - } - return(keep) -} - -#' select_years -#' Helper function for subset_lkup() -#' @inheritParams subset_lkup -#' @param keep logical vector -#' @return logical vector -select_years <- function(lkup, keep, year, country) { - # columns i is an ID that identifies if a country has more than one - # observation for reporting year. That is the case of IND with URB/RUR and ZWE - # with interporaltion and microdata info - # dtmp <- ref_lkup[, - # .i := seq_len(.N), - # by = .(country_code, reporting_year)] - - dtmp <- lkup - - year <- toupper(year) - country <- toupper(country) - keep_years <- rep(TRUE, nrow(dtmp)) - # STEP 1 - If Most Recent Value requested - if ("MRV" %in% year) { - # STEP 1.1 - If all countries selected. Select MRV for each country - dtmp <- - if (any(c("ALL", "WLD") %in% country)) { - # the i == 1 conditions ensures that it takes into account only one - # observation per country per reporting year. This has to bee like - # that in order to keep the same length as the `keep_years` vector. - # dtmp[, - # max_year := reporting_year == max(reporting_year) & i == 1, - # by = country_code] - - dtmp[, - max_year := reporting_year == max(reporting_year), - by = country_code] - - } else { - # STEP 1.2 - If only some countries selected. Select MRV for each selected - # country - dtmp[dtmp[["country_code"]] %in% country, - max_year := reporting_year == max(reporting_year), - by = country_code] - } - - # dtmp <- unique(dtmp[, .(country_code, reporting_year, max_year)]) - - - keep_years <- keep_years & as.logical(dtmp[["max_year"]]) - - } - # STEP 2 - If specific years are specified. Filter for these years - if (!any(c("ALL", "MRV") %in% year)) { - keep_years <- keep_years & dtmp$reporting_year %in% as.numeric(year) - - } - - # STEP 3 - Otherwise return all years - keep <- keep & keep_years - return(keep) -} - - - -#' Test whether a vector is length zero and IS not NULL -#' -#' @param x -#' -#' @return logical. TRUE if x is empty but it is not NULL -#' @export -#' -#' @examples -#' x <- vector() -#' is_empty(x) -#' -#' y <- NULL -#' length(y) -#' is_empty(y) -is_empty <- function(x) { - if (length(x) == 0 & !is.null(x)) { - TRUE - } else { - FALSE - } -} - - - - -#' Populate list in parent frame -#' -#' Fill in maned objects of a list with the value of named objects in the -#' parent frame in which the list has been created. This objects must have the -#' same names as the objects of the list -#' -#' @param l list to populate with names objects -#' @param assign logical: whether to assign to parent frame -#' -#' @return invisible list `l` populated with objects of the same frame -#' @export -#' -#' @examples -#' l <- list(x = NULL, -#' y = NULL, -#' z = NULL) -#' -#' x <- 2 -#' y <- "f" -#' z <- TRUE -#' fillin_list(l) -#' l -fillin_list <- function(l, - assign = TRUE) { - - # ____________________________________________________________ - # Defenses #### - stopifnot( exprs = { - is.list(l) - !is.data.frame(l) - } - ) - - # __________________________________________________________________ - # Early returns #### - if (FALSE) { - return() - } - - # _______________________________________________________________ - # Computations #### - # name of the list in parent frame - nm_l = deparse(substitute(l)) - - #n names of the objects of the list - nm_obj <- names(l) - - # all the objects in parent frame - obj_in_parent <- ls(envir = parent.frame()) - - # make sure that all the objects in list are in parent frame - if (!all(nm_obj %in% obj_in_parent)) { - - non_in_parent <-nm_obj[!nm_obj %in% obj_in_parent] - - stop_msg <- paste("The following objects are not in calling function: \n", - paste(non_in_parent, collapse = ", ")) - - stop(stop_msg) - } - - val_obj <- lapply(nm_obj, get, envir = parent.frame()) - names(val_obj) <- nm_obj - - for (i in seq_along(nm_obj)) { - x <- val_obj[[nm_obj[i]]] - if (!is_empty(x)) { - l[[nm_obj[i]]] <- x - } - } - - if (assign == TRUE) { - assign(nm_l, l, envir = parent.frame()) - } - - # ________________________________________________ - # Return #### - return(invisible(l)) - - # x = get(x_name, envir = parent.frame()) - # x_name = deparse(substitute(x)) -} - -#' Returns all auxiliary tables that support the long_format=TRUE parameter -#' @return character vector -#' @export - -get_valid_aux_long_format_tables <- function() { - c('cpi', 'ppp', 'gdp', 'pce', 'pop') -} - - - +utils::globalVariables( + c( + ".", "cache_id", "country_code", "cpi", "decile1", + "decile10", "decile2", "decile3", "decile4", + "decile5", "decile6", "decile7", "decile8", + "decile9", "distribution_type", "gini", + "headcount", "interpolation_id", + "is_interpolated", "median", "mld", + "polarization", "pop", "reporting_level", + "pop_in_poverty", "poverty_gap", + "poverty_line", "poverty_severity", + "ppp", "region_code", "reporting_pop", + "reporting_year", "survey_comparability", + "survey_coverage", "survey_mean_lcu", + "survey_mean_ppp", "survey_year", "watts", + "wb_region_code", "weighted.mean", + "welfare_type", "pcn_region_code", + "comparable_spell","..cols", "N", "check", + "data_interpolation_id", "display_cp", "region_name", + "sessionInfo" + ) +) + + +#' Subset look-up data +#' @inheritParams pip +#' @param valid_regions character: List of valid region codes that can be used +#' for region selection +#' @return data.frame +#' @keywords internal +subset_lkup <- function(country, + year, + welfare_type, + reporting_level, + lkup, + valid_regions) { + + # STEP 1 - Keep every row by default + keep <- rep(TRUE, nrow(lkup)) + # STEP 2 - Select countries + keep <- select_country(lkup, keep, country, valid_regions) + # STEP 3 - Select years + keep <- select_years(lkup, keep, year, country) + # STEP 4 - Select welfare_type + if (welfare_type[1] != "all") { + keep <- keep & lkup$welfare_type == welfare_type + } + # STEP 5 - Select reporting_level + keep <- select_reporting_level(lkup = lkup, + keep = keep, + reporting_level = reporting_level[1]) + + lkup <- lkup[keep, ] + + return(lkup) +} + +#' Helper to filter metadata +#' aggregate distribution need to be filtered out when popshare is not null +#' This is a temporary function until a full fix is implemented, and popshare is +#' supported for all distributions +#' +#' @param metadata data.frame: Output of `subset_lkup()` +#' @param popshare numeric: popshare value passed to `pip()` +#' +#' @return data.frame + +filter_lkup <- function(metadata, + popshare) { + # popshare option not supported for aggregate distributions + if (!is.null(popshare)) { + return( + metadata[metadata$distribution_type != "aggregate", ] + ) + } else { + return(metadata) + } + +} + + +#' helper function to correctly filter look up table according to requested +#' reporting level +#' +#' @param lkup data.table: Main lookup table +#' @param keep logical: Logical vector of rows to be kept +#' @param reporting_level character: Requested reporting level +#' +#' @return data.table +#' @export +#' +select_reporting_level <- function(lkup, + keep, + reporting_level) { + # To be updated: Fix the coverage variable names in aux data (reporting_coverage?) + if (reporting_level == "all") { + return(keep) + + } else if (reporting_level == "national") { + # Subnational levels necessary to compute national stats for aggregate distributions + keep <- keep & (lkup$reporting_level == reporting_level | lkup$is_used_for_aggregation) + return(keep) + + } else { + if ("survey_coverage" %in% names(lkup)) { + keep <- keep & + (lkup$survey_coverage == reporting_level | + lkup$reporting_level == reporting_level) + } else { + # This condition is not triggered + keep <- keep & lkup$reporting_level == reporting_level + } + return(keep) + } +} + + +#' Read survey data +#' +#' @param svy_id character: Survey ID +#' @param reporting_level character: geographical reporting level +#' @param path character: Path to survey data +#' +#' @return data.frame +#' @keywords internal +get_svy_data <- function(svy_id, + reporting_level, + path) { + # Each call should be made at a unique reporting_level (equivalent to reporting_data_level: national, urban, rural) + # This check should be conducted at the data validation stage + reporting_level <- unique(reporting_level) + assertthat::assert_that(length(reporting_level) == 1, + msg = "Problem with input data: Multiple reporting_levels" + ) + # tictoc::tic("read_single") + out <- lapply(path, function(x) { + + if (reporting_level %in% c("urban", "rural")) { # Not robust. Should not be hard coded here. + tmp <- fst::read_fst(x) + tmp <- tmp[tmp$area == reporting_level, ] + tmp <- tmp[, c("welfare", "weight")] + } else { + tmp <- fst::read_fst(x, columns = c("welfare", "weight")) + } + + return(tmp) + }) + + # Logging + # end_read_single <- tictoc::toc(quiet = TRUE) + # logger::log_info('read_single: {svy_id} {round(end_read_single$toc - end_read_single$tic, digits = getOption("digits", 6))}') + + names_out <- sprintf( + "df%s", + seq_along(svy_id) - 1 + ) + names(out) <- names_out + + return(out) +} + + +#' Add pre-computed distributional stats +#' +#' @param df data.table: Data frame of poverty statistics +#' @param dist_stats data.table: Distributional stats lookup +#' +#' @return data.table +#' @export +#' +add_dist_stats <- function(df, dist_stats) { + # Keep only relevant columns + cols <- c( + "cache_id", + # "country_code", + # "reporting_year", + # "welfare_type", + "reporting_level", + "gini", + "polarization", + "mld", + sprintf("decile%s", 1:10) + ) + dist_stats <- dist_stats[, .SD, .SDcols = cols] + + # merge dist stats with main table + # data.table::setnames(dist_stats, "survey_median_ppp", "median") + + df <- dist_stats[df, + on = .(cache_id, reporting_level), #.(country_code, reporting_year, welfare_type, reporting_level), + allow.cartesian = TRUE + ] + + return(df) +} + +#' Collapse rows +#' @return data.table +#' @noRd +collapse_rows <- function(df, vars, na_var = NULL) { + tmp_vars <- lapply(df[, .SD, .SDcols = vars], unique, collapse = "|") + tmp_vars <- lapply(tmp_vars, paste, collapse = "|") + tmp_var_names <- names(df[, .SD, .SDcols = vars]) + + if (!is.null(na_var)) df[[na_var]] <- NA_real_ + + for (tmp_var in seq_along(tmp_vars)) { + df[[tmp_var_names[tmp_var]]] <- tmp_vars[[tmp_var]] + } + + df <- unique(df) + return(df) +} + +#' Censor rows +#' Censor statistics based on a pre-defined censor table. +#' @param df data.table: Table to censor. Output from `pip()`. +#' @param censored list: List with censor tables. +#' @param type character: Type of censor table to use. Either countries or regions. +#' @return data.table +#' @noRd +censor_rows <- function(df, censored, type = c("countries", "regions")) { + + type <- match.arg(type) + + # Return early if there are no censoring observations + # if (nrow(censored[[type]]) == 0) { + # return(df) + # } + + # Create tmp_id to match with censor table + if (type == "countries") { + df$tmp_id <- + sprintf( + "%s_%s_%s_%s_%s", + df$country_code, df$reporting_year, + df$survey_acronym, df$welfare_type, + df$reporting_level + ) + } else { + df$tmp_id <- + sprintf( + "%s_%s", + df$region_code, df$reporting_year + ) + } + + # Apply censoring + out <- censor_stats(df, censored[[type]]) + out$tmp_id <- NULL + + return(out) +} + +#' Censor stats +#' @param df data.table: Table to censor. +#' @param censored_table data.table: Censor table +#' @noRd +censor_stats <- function(df, censored_table) { + + df$to_remove <- FALSE + if (any(df$tmp_id %in% censored_table$id)) { + for (i in seq_len(nrow(df))) { + for (y in seq_len(nrow(censored_table))) { + if (df$tmp_id[i] == censored_table$id[y]) { + # Remove entire row if all statistics should be removed + if (censored_table$statistic[y] == "all") { + df$to_remove[i] <- TRUE + } else { + # Otherwise set specific stats to NA + df[[censored_table$statistic[y]]][i] <- NA_real_ + } + } + } + } + } + df <- df[!df$to_remove] + df$to_remove <- NULL + + return(df) +} + +#' Create query controls +#' @param syv_lkup data.table: Survey lkup table +#' @param ref_lkup data.table: Reference lkup table +#' @param aux_files data.table: All valid regions and corresponding population +#' @param aux_tables character: List of available aux tables +#' @param versions character: List of available data versions +#' @return list +#' @noRd +create_query_controls <- function(svy_lkup, + ref_lkup, + aux_files, + aux_tables, + versions) { + # Countries and regions + countries <- unique(c( + svy_lkup$country_code, + ref_lkup$country_code + )) + + regions <- unique(c( + aux_files$regions$region_code + )) + + country <- list( + values = c( + "ALL", + sort(c( + countries, + regions) + ) + ), + type = "character" + ) + + region <- list( + values = sort(c("ALL", regions)), + type = "character" + ) + # Year + year <- list( + values = c( + "all", "MRV", + sort(unique(c( + svy_lkup$reporting_year, + ref_lkup$reporting_year + ))) + ), + type = "character" + ) + # Poverty line + povline <- list( + values = c(min = 0, max = 2700), + type = "numeric" + ) + # Popshare + popshare <- list( + values = c(min = 0, max = 1), + type = "numeric" + ) + + # Boolean parameters + fill_gaps <- + aggregate <- + long_format <- + additional_ind <- + list(values = c(TRUE, FALSE), + type = "logical") + + # Group by + group_by <- list( + values = c("none", "wb"), + type = "character" + ) + + # Welfare type + welfare_type <- list( + values = c("all", sort(unique(c( + svy_lkup$welfare_type, + ref_lkup$welfare_type + )))), + type = "character" + ) + # Reporting level + reporting_level <- list( + values = c( + "all", + sort(unique(c( + svy_lkup$reporting_level, + ref_lkup$reporting_level + ))) + ), + type = "character" + ) + # PPPs + ppp <- list( + values = c(min = 0.05, max = 1000000), # CHECK THE VALUE OF MAX + type = "numeric" + ) + # Versions + version <- list( + values = versions, + type = "character" + ) + # Formats + format <- list(values = c("json", "csv", "rds", "arrow"), + type = "character") + # Tables + table <- list(values = aux_tables, type = "character") + # parameters + parameter <- + list(values = c("country", "year", "povline", + "popshare", "fill_gaps", "aggregate", + "group_by", "welfare_type", + "reporting_level", "ppp", "version", + "format", "table", "long_format"), + type = "character") + # Endpoint + endpoint <- + list(values = c("all", + "aux", + "pip", + "pip-grp", + "pip-info", + "valid-params"), + type = "character") + + # Create list of query controls + query_controls <- list( + country = country, + region = region, + year = year, + povline = povline, + popshare = popshare, + fill_gaps = fill_gaps, + aggregate = aggregate, + long_format = long_format, + additional_ind = additional_ind, + group_by = group_by, + welfare_type = welfare_type, + reporting_level = reporting_level, + ppp = ppp, + version = version, + format = format, + table = table, + parameter = parameter, + endpoint = endpoint + ) + + return(query_controls) +} + +convert_empty <- function(string) { + if (string == "") { + "-" + } else { + string + } +} + + +#' Subset country-years table +#' This is a table created at start time to facilitate imputations +#' It part of the interpolated_list object +#' @param valid_regions character: List of valid region codes that can be used +#' @return data.frame +#' @keywords internal +subset_ctry_years <- function(country, + year, + lkup, + valid_regions) { + + keep <- TRUE + # Select data files based on requested country, year, etc. + # Select countries + if (!any(c("ALL", "WLD") %in% country)) { + # Select regions + if (any(country %in% valid_regions)) { + selected_regions <- country[country %in% valid_regions] + keep_regions <- lkup$region_code %in% selected_regions + } else { + keep_regions <- rep(FALSE, length(lkup$country_code)) + } + keep_countries <- lkup$country_code %chin% country + keep <- keep & (keep_countries | keep_regions) + } + + # if (!all(country %in% c("all", valid_regions))) { + # keep <- keep & lkup$country_code %in% country + # } + + # Select years + if (year[1] == "MRV") { + if (country[1] != "ALL") { + max_year <- max(lkup[country_code == country]$reporting_year) + } else { + max_year <- max(lkup$reporting_year) + } + keep <- keep & lkup$reporting_year %in% max_year + } + if (!year[1] %in% c("ALL", "MRV")) { + keep <- keep & lkup$reporting_year %in% as.numeric(year) + } + + lkup <- as.data.frame(lkup) + lkup <- lkup[keep, ] + + return(lkup) +} + +#' Clear cache +#' Clear cache directory if available +#' @param cd A `cachem::cache_disk()` object +#' @return list +#' @keywords internal +clear_cache <- function(cd) { + tryCatch({ + if (cd$size() > 0) { + cd$reset() + n <- cd$size() + if (n == 0) { + out <- list(status = 'success', msg = 'Cache cleared.') + } else { + out <- list(status = 'error', msg = sprintf('Something went wrong. %n items remain in cache.', n)) + } + } else { + out <- list(status = 'success', msg = 'Cache directory is empty. Nothing to clear.') + } + return(out) + }, error = function(e){ + out <- list(status = 'error', msg = 'Cache directory not found.') + return(out) + }) +} + +#' select_country +#' Helper function for subset_lkup() +#' @inheritParams subset_lkup +#' @param keep logical vector +#' @return logical vector +select_country <- function(lkup, keep, country, valid_regions) { + # Select data files based on requested country, year, etc. + # Select countries + if (!any(c("ALL", "WLD") %in% toupper(country))) { + # Select regions + if (any(country %in% valid_regions)) { + selected_regions <- country[country %in% valid_regions] + keep_regions <- lkup$region_code %in% selected_regions + } else { + keep_regions <- rep(FALSE, length(lkup$country_code)) + } + keep_countries <- lkup$country_code %in% country + keep <- keep & (keep_countries | keep_regions) + } + return(keep) +} + +#' select_years +#' Helper function for subset_lkup() +#' @inheritParams subset_lkup +#' @param keep logical vector +#' @return logical vector +select_years <- function(lkup, keep, year, country) { + # columns i is an ID that identifies if a country has more than one + # observation for reporting year. That is the case of IND with URB/RUR and ZWE + # with interporaltion and microdata info + # dtmp <- ref_lkup[, + # .i := seq_len(.N), + # by = .(country_code, reporting_year)] + + dtmp <- lkup + + year <- toupper(year) + country <- toupper(country) + keep_years <- rep(TRUE, nrow(dtmp)) + # STEP 1 - If Most Recent Value requested + if ("MRV" %in% year) { + # STEP 1.1 - If all countries selected. Select MRV for each country + dtmp <- + if (any(c("ALL", "WLD") %in% country)) { + # the i == 1 conditions ensures that it takes into account only one + # observation per country per reporting year. This has to bee like + # that in order to keep the same length as the `keep_years` vector. + # dtmp[, + # max_year := reporting_year == max(reporting_year) & i == 1, + # by = country_code] + + dtmp[, + max_year := reporting_year == max(reporting_year), + by = country_code] + + } else { + # STEP 1.2 - If only some countries selected. Select MRV for each selected + # country + dtmp[dtmp[["country_code"]] %in% country, + max_year := reporting_year == max(reporting_year), + by = country_code] + } + + # dtmp <- unique(dtmp[, .(country_code, reporting_year, max_year)]) + + + keep_years <- keep_years & as.logical(dtmp[["max_year"]]) + + } + # STEP 2 - If specific years are specified. Filter for these years + if (!any(c("ALL", "MRV") %in% year)) { + keep_years <- keep_years & dtmp$reporting_year %in% as.numeric(year) + + } + + # STEP 3 - Otherwise return all years + keep <- keep & keep_years + return(keep) +} + + + +#' Test whether a vector is length zero and IS not NULL +#' +#' @param x +#' +#' @return logical. TRUE if x is empty but it is not NULL +#' @export +#' +#' @examples +#' x <- vector() +#' is_empty(x) +#' +#' y <- NULL +#' length(y) +#' is_empty(y) +is_empty <- function(x) { + if (length(x) == 0 & !is.null(x)) { + TRUE + } else { + FALSE + } +} + + + + +#' Populate list in parent frame +#' +#' Fill in maned objects of a list with the value of named objects in the +#' parent frame in which the list has been created. This objects must have the +#' same names as the objects of the list +#' +#' @param l list to populate with names objects +#' @param assign logical: whether to assign to parent frame +#' +#' @return invisible list `l` populated with objects of the same frame +#' @export +#' +#' @examples +#' l <- list(x = NULL, +#' y = NULL, +#' z = NULL) +#' +#' x <- 2 +#' y <- "f" +#' z <- TRUE +#' fillin_list(l) +#' l +fillin_list <- function(l, + assign = TRUE) { + + # ____________________________________________________________ + # Defenses #### + stopifnot( exprs = { + is.list(l) + !is.data.frame(l) + } + ) + + # __________________________________________________________________ + # Early returns #### + if (FALSE) { + return() + } + + # _______________________________________________________________ + # Computations #### + # name of the list in parent frame + nm_l = deparse(substitute(l)) + + #n names of the objects of the list + nm_obj <- names(l) + + # all the objects in parent frame + obj_in_parent <- ls(envir = parent.frame()) + + # make sure that all the objects in list are in parent frame + if (!all(nm_obj %in% obj_in_parent)) { + + non_in_parent <-nm_obj[!nm_obj %in% obj_in_parent] + + stop_msg <- paste("The following objects are not in calling function: \n", + paste(non_in_parent, collapse = ", ")) + + stop(stop_msg) + } + + val_obj <- lapply(nm_obj, get, envir = parent.frame()) + names(val_obj) <- nm_obj + + for (i in seq_along(nm_obj)) { + x <- val_obj[[nm_obj[i]]] + if (!is_empty(x)) { + l[[nm_obj[i]]] <- x + } + } + + if (assign == TRUE) { + assign(nm_l, l, envir = parent.frame()) + } + + return(invisible(l)) + +} + +#' Returns all auxiliary tables that support the long_format=TRUE parameter +#' @return character vector +#' @export + +get_valid_aux_long_format_tables <- function() { + c('cpi', 'ppp', 'gdp', 'pce', 'pop') +} + + +#' load SPR table from aux data +#' +#' If there is no data available, return an empty data.frame +#' +#' @inheritParams get_aux_table +#' +#' @return data.table +get_spr_table <- function(data_dir, + table = c("spr_svy", "spr_lnp")) { + + table <- match.arg(table) + + spr <- + tryCatch( + expr = { + # Your code... + get_aux_table(data_dir = data_dir, + table = table) + }, # end of expr section + error = function(e) { + data.table::data.table( + country_code = character(0), + reporting_year = numeric(0), + welfare_type = character(0), + reporting_level = character(0), + spl = numeric(0), + spr = numeric(0), + median = numeric(0) + ) + } + ) # End of trycatch + return(spr) +} + + + + + + +#' Add SPL indicators to either fg* or rg PIP output +#' +#' @param df data frame inside [fg_pip] or [rg_pip] +#' @param data_dir character: Directory path of auxiliary data. Usually +#' `lkup$data_root` +#' @inheritParams pip +#' +#' @return data.table +add_spl <- function(df, fill_gaps, data_dir) { + + if (fill_gaps) { + spl <- + get_spr_table(data_dir = data_dir, + table = "spr_lnp") + + out <- merge.data.table( + x = df, + y = spl, + by = c( + "country_code", + "reporting_year", + "welfare_type", + "reporting_level" + ), + all.x = TRUE + ) + + } else { + # Add SPL ------------ + spl <- + get_spr_table(data_dir = data_dir, + table = "spr_svy") + + # Remove median from survey file and use the one from wbpip:::prod_compute_pip_stats + spl[, median := NULL] + + out <- merge.data.table( + x = df, + y = spl, + by = c( + "country_code", + "reporting_year", + "welfare_type", + "reporting_level" + ), + all.x = TRUE + ) + } + + return(out) +} + + + + + +#' Add Aggregate medians +#' +#' @param df data frame from either [fg_pip] or [rg_pip] +#' @param data_dir character: Directory path of auxiliary data. Usually +#' `lkup$data_root` +#' @inheritParams pip +#' +#' @return data.table +add_agg_medians <- function(df, fill_gaps, data_dir) { + + # Remove Get only obs with median == NA -------- + dtn <- df[is.na(median)] # NAs + dtn[, median := NULL] + + dtm <- df[!is.na(median)] # no NAs + + + ## early returns ----------- + if (nrow(dtn) == 0) { + return(df) + } + + + # Get medians from SPL data ----------- + if (fill_gaps) { + med <- + get_spr_table(data_dir = data_dir, + table = "spr_lnp") + + } else { + med <- + get_spr_table(data_dir = data_dir, + table = "spr_svy") + } + + med <- med |> + collapse::get_vars(c( + "country_code", + "reporting_year", + "welfare_type", + "reporting_level", + "median" + )) + + # join medians to missing data --------- + dtnm <- merge.data.table( # joined medians + x = dtn, + y = med, + by = c( + "country_code", + "reporting_year", + "welfare_type", + "reporting_level" + ), + all.x = TRUE + ) + + # append ------ + out <- data.table::rbindlist(list(dtnm, dtm), + use.names = TRUE, + fill = TRUE) + + + return(out) +} diff --git a/data-raw/lkup.R b/data-raw/lkup.R index 4d513a89..1d84f8f2 100644 --- a/data-raw/lkup.R +++ b/data-raw/lkup.R @@ -98,20 +98,20 @@ create_fake_data <- function(ver, pr = 0.01 ) { } - -vers <- c("20220810_2017_01_02_TEST", "20220609_2011_02_02_PROD", "20220810_2017_01_02_PROD") -purrr::map(vers, create_fake_data) - - -# ver <- "20220810_2017_01_02_PROD" -data_dir <- Sys.getenv("PIPAPI_FAKEDATA_FOLDER") -lkups <- create_versioned_lkups(data_dir = data_dir, - vintage_pattern = "PROD$") - -lkup <- lkups$versions_paths[[lkups$latest_release]] - -usethis::use_data(lkup, overwrite = TRUE) - -saveRDS(lkup, "tests/testthat/testdata/lkup.rds") - - +# +# vers <- c("20220810_2017_01_02_TEST", "20220609_2011_02_02_PROD", "20220810_2017_01_02_PROD") +# purrr::map(vers, create_fake_data) +# +# +# # ver <- "20220810_2017_01_02_PROD" +# data_dir <- Sys.getenv("PIPAPI_FAKEDATA_FOLDER") +# lkups <- create_versioned_lkups(data_dir = data_dir, +# vintage_pattern = "PROD$") +# +# lkup <- lkups$versions_paths[[lkups$latest_release]] +# +# usethis::use_data(lkup, overwrite = TRUE) +# +# saveRDS(lkup, "tests/testthat/testdata/lkup.rds") +# +# diff --git a/data/empty_response.rda b/data/empty_response.rda index 226653cb..5edea01d 100644 Binary files a/data/empty_response.rda and b/data/empty_response.rda differ diff --git a/data/empty_response_grp.rda b/data/empty_response_grp.rda index 8530f1f6..a96c8c7f 100644 Binary files a/data/empty_response_grp.rda and b/data/empty_response_grp.rda differ diff --git a/inst/plumber/v1/endpoints.R b/inst/plumber/v1/endpoints.R index d5d09c42..be3ea64e 100644 --- a/inst/plumber/v1/endpoints.R +++ b/inst/plumber/v1/endpoints.R @@ -165,7 +165,8 @@ function(req, res) { # "max-age=172800") res$setHeader("ETag", - pipapi::create_etag_header(req)) + pipapi::create_etag_header(req, + lkups = lkups)) plumber::forward() @@ -531,7 +532,7 @@ function(req) { params <- req$argsQuery params$lkup <- lkups$versions_paths[[req$argsQuery$version]] params$version <- NULL - do.call(pipapi:::ui_hp_countries, params) + do.call(pipapi::ui_hp_countries, params) } diff --git a/man/add_agg_medians.Rd b/man/add_agg_medians.Rd new file mode 100644 index 00000000..41a66148 --- /dev/null +++ b/man/add_agg_medians.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{add_agg_medians} +\alias{add_agg_medians} +\title{Add Aggregate medians} +\usage{ +add_agg_medians(df, fill_gaps, data_dir) +} +\arguments{ +\item{df}{data frame from either \link{fg_pip} or \link{rg_pip}} + +\item{fill_gaps}{logical: If set to TRUE, will interpolate / extrapolate +values for missing years} + +\item{data_dir}{character: Directory path of auxiliary data. Usually +\code{lkup$data_root}} +} +\value{ +data.table +} +\description{ +Add Aggregate medians +} diff --git a/man/add_spl.Rd b/man/add_spl.Rd new file mode 100644 index 00000000..29ce9751 --- /dev/null +++ b/man/add_spl.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{add_spl} +\alias{add_spl} +\title{Add SPL indicators to either fg* or rg PIP output} +\usage{ +add_spl(df, fill_gaps, data_dir) +} +\arguments{ +\item{df}{data frame inside \link{fg_pip} or \link{rg_pip}} + +\item{fill_gaps}{logical: If set to TRUE, will interpolate / extrapolate +values for missing years} + +\item{data_dir}{character: Directory path of auxiliary data. Usually +\code{lkup$data_root}} +} +\value{ +data.table +} +\description{ +Add SPL indicators to either fg* or rg PIP output +} diff --git a/man/create_etag_header.Rd b/man/create_etag_header.Rd index 9aa82420..ebbea9c6 100644 --- a/man/create_etag_header.Rd +++ b/man/create_etag_header.Rd @@ -4,10 +4,12 @@ \alias{create_etag_header} \title{create_etag_header} \usage{ -create_etag_header(req) +create_etag_header(req, lkups) } \arguments{ \item{req}{R6 object: Plumber API request} + +\item{lkups}{list: pipapi master lkups} } \value{ character diff --git a/man/empty_response.Rd b/man/empty_response.Rd index 0d87930d..fd71f784 100644 --- a/man/empty_response.Rd +++ b/man/empty_response.Rd @@ -5,7 +5,7 @@ \alias{empty_response} \title{Empty response schema} \format{ -An object of class \code{data.table} (inherits from \code{data.frame}) with 0 rows and 40 columns. +An object of class \code{data.table} (inherits from \code{data.frame}) with 0 rows and 42 columns. } \usage{ empty_response diff --git a/man/get_pip_version.Rd b/man/get_pip_version.Rd index fcea50d2..99099133 100644 --- a/man/get_pip_version.Rd +++ b/man/get_pip_version.Rd @@ -4,10 +4,7 @@ \alias{get_pip_version} \title{Return the versions of the pip packages used for computations} \usage{ -get_pip_version( - pip_packages = c("pipapi", "wbpip"), - data_versions = lkups$versions -) +get_pip_version(pip_packages = c("pipapi", "wbpip"), data_versions) } \arguments{ \item{pip_packages}{character: Custom packages powering the API} diff --git a/man/get_spr_table.Rd b/man/get_spr_table.Rd new file mode 100644 index 00000000..3ac71ec6 --- /dev/null +++ b/man/get_spr_table.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{get_spr_table} +\alias{get_spr_table} +\title{load SPR table from aux data} +\usage{ +get_spr_table(data_dir, table = c("spr_svy", "spr_lnp")) +} +\arguments{ +\item{data_dir}{character: Data directory} + +\item{table}{character: Name of auxiliary table} +} +\value{ +data.table +} +\description{ +If there is no data available, return an empty data.frame +} diff --git a/tests/testthat/test-clear_cache.R b/tests/testthat/test-clear_cache.R index dbc252a1..705946bd 100644 --- a/tests/testthat/test-clear_cache.R +++ b/tests/testthat/test-clear_cache.R @@ -1,5 +1,6 @@ # Tests depend on PIPAPI_DATA_ROOT_FOLDER_LOCAL. Skip if not found. -skip_if(Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL") == "") +skip_if(Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL") == "" || + Sys.getenv("PIPAPI_APPLY_CACHING") != TRUE) # lkups <- create_versioned_lkups(Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL")) diff --git a/tests/testthat/test-create_lkups.R b/tests/testthat/test-create_lkups.R index fe91b519..3f455ae7 100644 --- a/tests/testthat/test-create_lkups.R +++ b/tests/testthat/test-create_lkups.R @@ -102,3 +102,11 @@ test_that("sort_versions correctly orders available versions", { expect_equal(out, expected_sorted_versions) }) + +# test_that("all objects are correctly passed and used", { +# root <- rprojroot::is_r_package +# tmp <- lintr::lint(filename = root$find_file("R/create_lkups.R"), +# linters = lintr::object_usage_linter()) +# +# expect_equal(length(tmp), 0) +# }) diff --git a/tests/testthat/test-fg_pip-local.R b/tests/testthat/test-fg_pip-local.R index 0476f040..925b13d9 100644 --- a/tests/testthat/test-fg_pip-local.R +++ b/tests/testthat/test-fg_pip-local.R @@ -1,5 +1,6 @@ # skip_if(Sys.getenv('WBPIP_RUN_LOCAL_TESTS') != "TRUE") # Tests depend on PIPAPI_DATA_ROOT_FOLDER_LOCAL. Skip if not found. + skip_if(Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL") == "") lkups <- create_versioned_lkups(Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL")) lkup <- lkups$versions_paths[[lkups$latest_release]] @@ -126,3 +127,124 @@ test_that("fg_remove_duplicates test", { expect_equal(nrow(res), 2) expect_type(res$data_interpolation_id, "character") }) + + +# SPL and median -------------- +tmp <- fg_pip( + country = "ALL", + year = "ALL", + povline = 2.15, + popshare = NULL, + welfare_type = "all", + reporting_level = "all", + ppp = NULL, + lkup = lkup +) + +# dt <- pip(country = "ALL", +# lkup = lkup, +# povline = 2.15, +# fill_gaps = TRUE) +# setDT(dt) + + +censored <- lkup$censored$countries + + +## no unexpected NAs ------------ +test_that("NAs only in censored data", { + + ### Median ------------ + expect_equal( + tmp[is.na(median)][!censored, + on = c("country_code", + "reporting_year", + "reporting_level", + "welfare_type")] |> + nrow(), + expected = 0) + + ### SPR --------------- + expect_equal( + tmp[is.na(spr)][!censored, + on = c("country_code", + "reporting_year", + "reporting_level", + "welfare_type")] |> + nrow(), + expected = 0) + +}) + +## Duplicates ------------- +test_that("median does not have duplicates", { + + ### by reporting level---------------- + anyDuplicated(tmp[!is.na(median), + c("country_code", + "reporting_year", + "welfare_type", + # "reporting_level", + "median")]) |> + expect_equal(0) + + ### by welfare type ------------- + anyDuplicated(tmp[!is.na(median), + c("country_code", + "reporting_year", + # "welfare_type", + "reporting_level", + "median")]) |> + expect_equal(0) + +}) + +test_that("SPR does not have duplicates", { + + ### by reporting level---------------- + anyDuplicated(tmp[!is.na(spr), + c("country_code", + "reporting_year", + "welfare_type", + # "reporting_level", + "spr")]) |> + expect_equal(0) + + ### by welfare type ------------- + anyDuplicated(tmp[!is.na(spr), + c("country_code", + "reporting_year", + # "welfare_type", + "reporting_level", + "spr")]) |> + expect_equal(0) + +}) + + +test_that("SPL is the same by reporting level", { + + + no_na <- + tmp[!is.na(spl), + c("country_code", + "reporting_year", + "welfare_type", + "reporting_level", + "spl") + ] + + no_na[, .N, by = c("country_code", + "reporting_year", + "welfare_type", + "reporting_level", + "spl")][,N] |> + expect_equal( + no_na[, .N, by = c("country_code", + "reporting_year", + "welfare_type", + "reporting_level")][,N] + ) +}) + + diff --git a/tests/testthat/test-get_aux_table.R b/tests/testthat/test-get_aux_table.R index 0d967a3b..556e893e 100644 --- a/tests/testthat/test-get_aux_table.R +++ b/tests/testthat/test-get_aux_table.R @@ -22,3 +22,12 @@ test_that("get_aux_table() returns an error", { expect_error(pipapi::get_aux_table(data_folder_root, "../survey_data/ARG_1980_EPH_D2_INC_GROUP.fst"), "Error opening fst file for reading, please check access rights and file availability") }) + +# test_that("all objects are correctly passed and used", { +# root <- rprojroot::is_r_package +# tmp <- lintr::lint(filename = root$find_file("R/get_aux_table.R"), +# linters = lintr::object_usage_linter()) +# +# expect_equal(length(tmp), 0) +# }) + diff --git a/tests/testthat/test-get_param_values-local.R b/tests/testthat/test-get_param_values-local.R index 90807072..198e0a76 100644 --- a/tests/testthat/test-get_param_values-local.R +++ b/tests/testthat/test-get_param_values-local.R @@ -1,17 +1,65 @@ # Tests depend on PIPAPI_DATA_ROOT_FOLDER_LOCAL. Skip if not found. skip_if(Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL") == "") lkups <- create_versioned_lkups(Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL")) +version <- lkups$latest_release -res <- get_param_values(lkup = lkups, version = "latest_release") +res <- get_param_values(lkup = lkups, version = version) -test_that("get_param_values() returns a data.table", { +test_that("get_param_values() returns expected format", { + res <- get_param_values(lkup = lkups, + endpoint = "all", + version = version) + # Expected class is being returned expect_equal(class(res), c("data.table", "data.frame")) -}) + # Expected column names are being returned + expect_equal(colnames(res), c("param_names", + "param_values", + "param_boundaries", + "param_types")) + expect_equal(unname(unlist(lapply(res, class))), rep("character", 4)) -test_that("get_param_values() returns expected columns", { + res <- get_param_values(lkup = lkups, + endpoint = "pip", + version = version) + # Expected class is being returned + expect_equal(class(res), c("data.table", "data.frame")) + # Expected column names are being returned expect_equal(colnames(res), c("param_names", "param_values", "param_boundaries", "param_types")) expect_equal(unname(unlist(lapply(res, class))), rep("character", 4)) }) + +test_that("get_param_values() works as expected for specific endpoint", { + res <- get_param_values(lkup = lkups, + endpoint = "pip", + version = version) + expect_true(nrow(res) > 0) + expect_equal(sort(unique(res$param_names)), c("additional_ind", + "country", + "fill_gaps", + "group_by", + "popshare", + "povline", + "ppp", + "reporting_level", + "version", + "welfare_type", + "year")) + + res <- get_param_values(lkup = lkups, + endpoint = "pip-grp", + version = version) + expect_true(nrow(res) > 0) + expect_equal(sort(unique(res$param_names)), c("country", + "group_by", + "povline", + "reporting_level", + "version", + "welfare_type", + "year")) +}) + + + diff --git a/tests/testthat/test-get_param_values.R b/tests/testthat/test-get_param_values.R index e103a997..e1fffeae 100644 --- a/tests/testthat/test-get_param_values.R +++ b/tests/testthat/test-get_param_values.R @@ -8,3 +8,11 @@ # res <- get_param_values('parameter', lkup = lkups) # expect_equal(res, data.frame(parameter = lkups$query_controls$parameter$values[1:11])) # }) + +# test_that("all objects are correctly passed and used", { +# root <- rprojroot::is_r_package +# tmp <- lintr::lint(filename = root$find_file("R/get_param_values.R"), +# linters = lintr::object_usage_linter()) +# +# expect_equal(length(tmp), 0) +# }) diff --git a/tests/testthat/test-get_pip_version.R b/tests/testthat/test-get_pip_version.R index 6448d4c1..868f059d 100644 --- a/tests/testthat/test-get_pip_version.R +++ b/tests/testthat/test-get_pip_version.R @@ -13,3 +13,11 @@ test_that("get_pip_version() is working", { "server_os", "server_time")) }) + +# test_that("all objects are correctly passed and used", { +# root <- rprojroot::is_r_package +# tmp <- lintr::lint(filename = root$find_file("R/get_pip_version.R"), +# linters = lintr::object_usage_linter()) +# +# expect_equal(length(tmp), 0) +# }) diff --git a/tests/testthat/test-lint.R b/tests/testthat/test-lint.R new file mode 100644 index 00000000..e9882c00 --- /dev/null +++ b/tests/testthat/test-lint.R @@ -0,0 +1,84 @@ +# +# test_that("there is unused objects", { +# lints <- lintr::lint_package(linters = lintr::object_usage_linter()) +# # NOTE: There are still some flagged lints, but they should be harmless +# # Some are due to data.table use of NSE +# # Others are due to code like the chunk below: +# # if (reporting_level %in% c("national", "all")) { +# # out <- add_agg_stats(out, +# # return_cols = lkup$return_cols$ag_average_poverty_stats) +# # } +# expect_equal(length(lints), 9, info = as.character(lints)) +# }) +# +# test_that("there is no cyclomatic complexity issue", { +# lints <- lintr::lint_package(linters = lintr::cyclocomp_linter(complexity_limit = 20L)) +# +# expect_equal(length(lints), 0, info = as.character(lints)) +# }) +# +# test_that("there is no unnecessary nested if conditions", { +# skip("TO BE REVIEWED") +# lints <- lintr::lint_package(linters = lintr::unnecessary_nested_if_linter()) +# +# expect_equal(length(lints), 0, info = as.character(lints)) +# }) +# +# test_that("all code is reachable", { +# lints <- lintr::lint_package(linters = lintr::unreachable_code_linter()) +# +# expect_equal(length(lints), 0, info = as.character(lints)) +# }) +# +# test_that("there is no duplicate arguments in function calls", { +# lints <- lintr::lint_package(linters = lintr::duplicate_argument_linter()) +# +# expect_equal(length(lints), 0, info = as.character(lints)) +# }) +# +# test_that("default arguments come last", { +# skip("TO BE REVIEWED") +# lints <- lintr::lint_package(linters = lintr::function_argument_linter()) +# +# expect_equal(length(lints), 0, info = as.character(lints)) +# }) +# +# +# test_that("c() is applied before expensive functions", { +# lints <- lintr::lint_package(linters = lintr::inner_combine_linter()) +# +# expect_equal(length(lints), 0, info = as.character(lints)) +# }) +# +# +# test_that("there is no function with missing argument", { +# lints <- lintr::lint_package(linters = lintr::missing_argument_linter()) +# +# expect_equal(length(lints), 0, info = as.character(lints)) +# }) +# +# test_that("there is no namespace issue", { +# lints <- lintr::lint_package( +# linters = lintr::namespace_linter( +# check_exports = FALSE, +# check_nonexports = FALSE) +# ) +# +# expect_equal(length(lints), 0, info = as.character(lints)) +# }) +# +# test_that("lambda functions are not used unnecessarily", { +# lints <- lintr::lint_package(linters = lintr::unnecessary_lambda_linter()) +# +# expect_equal(length(lints), 0, info = as.character(lints)) +# }) +# +# test_that("no unnecessary concatenation is being used", { +# skip("TO BE REVIEWED") +# lints <- lintr::lint_package( +# linters = lintr::unnecessary_concatenation_linter( +# allow_single_expression = TRUE) +# ) +# +# expect_equal(length(lints), 0, info = as.character(lints)) +# }) diff --git a/tests/testthat/test-pip-local.R b/tests/testthat/test-pip-local.R index f92f079e..d84afd3f 100644 --- a/tests/testthat/test-pip-local.R +++ b/tests/testthat/test-pip-local.R @@ -726,5 +726,102 @@ test_that("error when more than one dataset is passed", { ) }) +# + +# SPL and median -------------- + +tmp <- pip(country = "ALL", + lkup = lkups, + povline = 2.15, + fill_gaps = TRUE) +setDT(tmp) + +## NAs ----------- + +test_that("no NAs", { + ### median ------------ + tmp[is.na(median)] |> + nrow() |> + expect_equal(0) + ### spl ------------ + tmp[is.na(spl)] |> + nrow() |> + expect_equal(0) + ### spr ------------ + tmp[is.na(spr)] |> + nrow() |> + expect_equal(0) +}) + +## Duplicates ------------- +test_that("median does not have duplicates", { + + ### by reporting level---------------- + anyDuplicated(tmp[, + c("country_code", + "reporting_year", + "welfare_type", + # "reporting_level", + "median")]) |> + expect_equal(0) + + ### by welfare type ------------- + anyDuplicated(tmp[, + c("country_code", + "reporting_year", + # "welfare_type", + "reporting_level", + "median")]) |> + expect_equal(0) + +}) + +test_that("SPR does not have duplicates", { + + ### by reporting level---------------- + anyDuplicated(tmp[, + c("country_code", + "reporting_year", + "welfare_type", + # "reporting_level", + "spr")]) |> + expect_equal(0) + + ### by welfare type ------------- + anyDuplicated(tmp[, + c("country_code", + "reporting_year", + # "welfare_type", + "reporting_level", + "spr")]) |> + expect_equal(0) + +}) + + +test_that("SPL is the same by reporting level", { + + + no_na <- + tmp[, + c("country_code", + "reporting_year", + "welfare_type", + "reporting_level", + "spl") + ] + + no_na[, .N, by = c("country_code", + "reporting_year", + "welfare_type", + "reporting_level", + "spl")][,N] |> + expect_equal( + no_na[, .N, by = c("country_code", + "reporting_year", + "welfare_type", + "reporting_level")][,N] + ) +}) diff --git a/tests/testthat/test-pip.R b/tests/testthat/test-pip.R index 8713102a..df475f0a 100644 --- a/tests/testthat/test-pip.R +++ b/tests/testthat/test-pip.R @@ -602,3 +602,4 @@ test_that("error when more than one dataset is passed", { Try passing a single one by subsetting it lkup <- lkups$versions_paths$dataset_name_PROD", fixed = TRUE) }) + diff --git a/tests/testthat/test-pip_grp.R b/tests/testthat/test-pip_grp.R index 7da3f356..2106e68a 100644 --- a/tests/testthat/test-pip_grp.R +++ b/tests/testthat/test-pip_grp.R @@ -12,7 +12,7 @@ censored <- # Check pip_grp against current implementation # TO BE REMOVED ONCE pip() group_by OPTION is FULLY DEPRECATED test_that("output from pip_grp is the same as output from pip", { - # skip("TEMPORARY SKIP") + skip("Skip because `pip()` should not be used with `group_by` argument anymore.") out_pip <- pip( country = "all", year = 2010, @@ -247,3 +247,4 @@ test_that("region selection is working for multiple regions and country from oth expect_equal(nrow(out), length(expected_region_values)) expect_equal(sort(out$region_code), sort(expected_region_values)) }) + diff --git a/tests/testthat/test-plumber-future.R b/tests/testthat/test-plumber-future.R index 8ec89946..ea0e8151 100644 --- a/tests/testthat/test-plumber-future.R +++ b/tests/testthat/test-plumber-future.R @@ -18,7 +18,7 @@ api1 <- future.callr::callr(function() { library(pipapi) lkups <<- pipapi::create_versioned_lkups(Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL")) pipapi::start_api(port = 8000) -}, workers = 2, # n workers for API +}, workers = 2#, # n workers for API #globals = list(lkups = lkups), #packages = c("pipapi") ) diff --git a/tests/testthat/test-ret_list.R b/tests/testthat/test-ret_list.R deleted file mode 100644 index 8849056e..00000000 --- a/tests/testthat/test-ret_list.R +++ /dev/null @@ -1,3 +0,0 @@ -test_that("multiplication works", { - expect_equal(2 * 2, 4) -}) diff --git a/tests/testthat/test-ui_functions.R b/tests/testthat/test-ui_country_profile.R similarity index 70% rename from tests/testthat/test-ui_functions.R rename to tests/testthat/test-ui_country_profile.R index f2b55d95..405d41e9 100644 --- a/tests/testthat/test-ui_functions.R +++ b/tests/testthat/test-ui_country_profile.R @@ -5,11 +5,6 @@ skip_if(Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL") == "") lkups <- create_versioned_lkups(Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL")) lkups <- lkups$versions_paths[[lkups$latest_release]] -# lkup_path <- test_path("testdata", "lkup.rds") -# lkups <- readRDS(lkup_path) - - - set.seed(42) lkups$pl_lkup <- lkups$pl_lkup[sample(nrow(lkups$pl_lkup), 10)] lkups2 <- lkups @@ -20,113 +15,6 @@ lkups2$ref_lkup <- lkups2$ref_lkup[country_code %in% c('AGO', 'ZWE')] dt_lac <- readRDS(test_path("testdata", "pip_lac_resp.rds")) dt_sas <- readRDS(test_path("testdata", "pip_sas_resp.rds")) -test_that("ui_hp_stacked() works as expected", { - res <- ui_hp_stacked(povline = 1.9, lkup = lkups2) - countries_selected <- lkups2$svy_lkup[, unique(country_code)] - tmp <- lkups2$aux_files$country_list[country_code %in% countries_selected] - - var_code <- c("country_code", "region_code", "world_code") - - regions_of_countries <- - tmp[, ..var_code] |> - melt(id.vars = "country_code") |> - {\(.) .[, value] }() |> - unique() - - - expect_identical( - names(res), - c( - "region_code", "reporting_year", - "poverty_line", "pop_in_poverty" - ) - ) - expect_identical(unique(res$region_code) |> - sort(), - regions_of_countries |> - sort()) - expect_true(all(res$pop_in_poverty == floor(res$pop_in_poverty))) # No decimals for population numbers -}) - -test_that("ui_hp_countries() works as expected", { - res <- ui_hp_countries(country = c("AGO", "CIV"), povline = 1.9, lkup = lkups) - expect_identical( - names(res), - c( - "region_code", "country_code", - "reporting_year", "poverty_line", - "reporting_pop", "pop_in_poverty" - ) - ) - expect_true(all(res$pop_in_poverty < 50)) - check <- lkups$svy_lkup[country_code %in% c("AGO", "CIV")]$reporting_year - expect_equal(res$reporting_year, check) -}) - -test_that("ui_pc_charts() works as expected", { - skip("TEMPORARY SKIP") - - # Regular query (fill_gaps = FALSE) - res <- ui_pc_charts(country = "AGO", povline = 1.9, lkup = lkups) - expect_equal(nrow(res), nrow(lkups$svy_lkup[country_code == "AGO"])) - expect_equal(length(names(res)), 35) - - skip("TEMPORARY SKIP") - # Regular query (fill_gaps = TRUE) - res <- ui_pc_charts(country = "AGO", povline = 1.9, fill_gaps = TRUE, lkup = lkups) - expect_equal(nrow(res), length(unique(lkups$ref_lkup$reporting_year))) - - # Group by - res <- ui_pc_charts(country = "AGO", group_by = "wb", povline = 1.9, lkup = lkups) - res2 <- pip(country = "AGO", group_by = "wb", povline = 1.9, lkup = lkups) - res2$reporting_pop <- res2$reporting_pop / 1e6 - res2$pop_in_poverty <- res2$pop_in_poverty / 1e6 - expect_equal(res, res2) -}) - -test_that("ui_pc_regional() works as expected", { - skip("This test fails but this situation should never arise in practice. - The lkup table is incomplete.") - res <- ui_pc_regional(povline = 1.9, lkup = lkups2) - - countries_selected <- lkups2$svy_lkup[, unique(country_code)] - tmp <- lkups2$aux_files$country_list[country_code %in% countries_selected] - - var_code <- grep("_code$", names(tmp), value = TRUE) - - regions_of_countries <- - tmp[, ..var_code] |> - melt(id.vars = "country_code") |> - {\(.) .[, value] }() |> - unique() - - expect_identical( - names(res) |> sort(), - c( - "region_name", - "region_code", - "reporting_year", - "reporting_pop", - "poverty_line", - "headcount", - "poverty_gap", - "poverty_severity", - "watts", - "mean", - "pop_in_poverty" - ) |> sort() - ) - - # DISABLE TEMPORARILY: NEEDS TO BE UPDATED - # expect_identical(unique(res$region_code) |> - # sort(), - # regions_of_countries |> - # sort()) - - - # expect_identical(unique(res$region_code), c("SSA", "WLD")) -}) - test_that("ui_cp_poverty_charts() works as expected", { dl <- ui_cp_poverty_charts( country = "AGO", @@ -169,7 +57,7 @@ test_that("ui_cp_poverty_charts() works as expected", { ) expect_equal(nrow(dl$pov_trend), nrow(lkups$dist_stats[country_code == "CHN" & - reporting_level == "national"])) + reporting_level == "national"])) expect_equal(unique(dl$pov_trend$reporting_level), "national") # Test that ui_cp_poverty_charts() works correctly for @@ -270,7 +158,7 @@ test_that("ui_cp_ki_headcount() works as expected", { df <- ui_cp_ki_headcount(country = "ARG", povline = 1.9, lkup = lkups) expect_false(is.na(df$headcount)) expect_equal(df$reporting_year, - max(lkups$svy_lkup[country_code == "ARG"]$reporting_year)) + max(lkups$svy_lkup[country_code == "ARG"]$reporting_year)) df <- ui_cp_ki_headcount(country = "SUR", povline = 1.9, lkup = lkups) expect_false(is.na(df$headcount)) @@ -283,14 +171,14 @@ test_that("ui_cp_ki_headcount() works as expected", { expect_false(is.na(df$headcount)) expect_equal(df$reporting_year, max(lkups$dist_stats[country_code == "CHN" & - reporting_level == "national"]$reporting_year)) + reporting_level == "national"]$reporting_year)) # Test that ui_cp_ki_headcount() works correctly for countries # w/ multiple reporting levels (only national rows are returned) df <- ui_cp_ki_headcount(country = "URY", povline = 1.9, lkup = lkups) expect_false(is.na(df$headcount)) expect_equal(df$reporting_year, max(lkups$dist_stats[country_code == "URY" & - reporting_level == "national"]$reporting_year)) + reporting_level == "national"]$reporting_year)) }) @@ -348,63 +236,6 @@ test_that("ui_cp_charts() only returns first country if multiple countries are p expect_equal(names(dl1), "AGO") }) -test_that("ui_svy_meta() works as expected", { - expected_names <- - c( - "country_code", - "country_name", - "reporting_year" , - "survey_year", - "surveyid_year", - "survey_title", - "survey_conductor", - "survey_coverage", - "welfare_type", - "distribution_type", - "metadata" - ) - expected_metadata <- c( - #"surveyid_year", - "survey_acronym", - "year_start", - "year_end", - "authoring_entity_name", - "abstract", - "collection_dates_cycle", - "collection_dates_start", - "collection_dates_end", - "sampling_procedure", - "collection_mode", - "coll_situation", - "weight", - "cleaning_operations" - ) - - res <- ui_svy_meta(country = "AGO", lkup = lkups) - expect_equal(unique(res$country_code), "AGO") - expect_equal(names(res), - expected_names) - - expect_equal( - names(res$metadata[[1]]), - expected_metadata - ) - - res <- ui_svy_meta(country = "all", lkup = lkups) - - expect_true(all(unique(res$country_code) %in% - lkups$query_controls$country$values)) - - expect_equal(names(res), - expected_names) - - expect_equal( - names(res$metadata[[1]]), - expected_metadata - ) - -}) - test_that("cp_correct_reporting_level() is working as expected for countries with 3 reporting levels", { # China returns national, urban, and rural data region <- @@ -526,3 +357,4 @@ test_that("ui_cp_key_indicators() optimized version returns same results as prev expect_equal(dl1, dl2) }) + diff --git a/tests/testthat/test-ui_home_page.R b/tests/testthat/test-ui_home_page.R new file mode 100644 index 00000000..472eb351 --- /dev/null +++ b/tests/testthat/test-ui_home_page.R @@ -0,0 +1,56 @@ +# Tests depend on PIPAPI_DATA_ROOT_FOLDER_LOCAL. Skip if not found. +skip_if(Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL") == "") + +# constants +lkups <- create_versioned_lkups(Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL")) +lkups <- lkups$versions_paths[[lkups$latest_release]] + +set.seed(42) +lkups$pl_lkup <- lkups$pl_lkup[sample(nrow(lkups$pl_lkup), 10)] +lkups2 <- lkups +lkups2$svy_lkup <- lkups2$svy_lkup[country_code %in% c('AGO', 'ZWE')] +lkups2$ref_lkup <- lkups2$ref_lkup[country_code %in% c('AGO', 'ZWE')] + +test_that("ui_hp_stacked() works as expected", { + res <- ui_hp_stacked(povline = 1.9, lkup = lkups2) + countries_selected <- lkups2$svy_lkup[, unique(country_code)] + tmp <- lkups2$aux_files$country_list[country_code %in% countries_selected] + + var_code <- c("country_code", "region_code", "world_code") + + regions_of_countries <- + tmp[, ..var_code] |> + melt(id.vars = "country_code") |> + {\(.) .[, value] }() |> + unique() + + + expect_identical( + names(res), + c( + "region_code", "reporting_year", + "poverty_line", "pop_in_poverty" + ) + ) + expect_identical(unique(res$region_code) |> + sort(), + regions_of_countries |> + sort()) + expect_true(all(res$pop_in_poverty == floor(res$pop_in_poverty))) # No decimals for population numbers +}) + +test_that("ui_hp_countries() works as expected", { + res <- ui_hp_countries(country = c("AGO", "CIV"), povline = 1.9, lkup = lkups) + expect_identical( + names(res), + c( + "region_code", "country_code", + "reporting_year", "poverty_line", + "reporting_pop", "pop_in_poverty" + ) + ) + expect_true(all(res$pop_in_poverty < 50)) + check <- lkups$svy_lkup[country_code %in% c("AGO", "CIV")]$reporting_year + expect_equal(res$reporting_year, check) +}) + diff --git a/tests/testthat/test-ui_miscellaneous.R b/tests/testthat/test-ui_miscellaneous.R new file mode 100644 index 00000000..9b489f92 --- /dev/null +++ b/tests/testthat/test-ui_miscellaneous.R @@ -0,0 +1,114 @@ +# Tests depend on PIPAPI_DATA_ROOT_FOLDER_LOCAL. Skip if not found. +skip_if(Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL") == "") + +# constants +lkups <- create_versioned_lkups(Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL")) +lkups <- lkups$versions_paths[[lkups$latest_release]] + +set.seed(42) +lkups$pl_lkup <- lkups$pl_lkup[sample(nrow(lkups$pl_lkup), 10)] +lkups2 <- lkups +lkups2$svy_lkup <- lkups2$svy_lkup[country_code %in% c('AGO', 'ZWE')] +lkups2$ref_lkup <- lkups2$ref_lkup[country_code %in% c('AGO', 'ZWE')] + +test_that("ui_hp_stacked() works as expected", { + res <- ui_hp_stacked(povline = 1.9, lkup = lkups2) + countries_selected <- lkups2$svy_lkup[, unique(country_code)] + tmp <- lkups2$aux_files$country_list[country_code %in% countries_selected] + + var_code <- c("country_code", "region_code", "world_code") + + regions_of_countries <- + tmp[, ..var_code] |> + melt(id.vars = "country_code") |> + {\(.) .[, value] }() |> + unique() + + + expect_identical( + names(res), + c( + "region_code", "reporting_year", + "poverty_line", "pop_in_poverty" + ) + ) + expect_identical(unique(res$region_code) |> + sort(), + regions_of_countries |> + sort()) + expect_true(all(res$pop_in_poverty == floor(res$pop_in_poverty))) # No decimals for population numbers +}) + +test_that("ui_hp_countries() works as expected", { + res <- ui_hp_countries(country = c("AGO", "CIV"), povline = 1.9, lkup = lkups) + expect_identical( + names(res), + c( + "region_code", "country_code", + "reporting_year", "poverty_line", + "reporting_pop", "pop_in_poverty" + ) + ) + expect_true(all(res$pop_in_poverty < 50)) + check <- lkups$svy_lkup[country_code %in% c("AGO", "CIV")]$reporting_year + expect_equal(res$reporting_year, check) +}) + +test_that("ui_svy_meta() works as expected", { + expected_names <- + c( + "country_code", + "country_name", + "reporting_year" , + "survey_year", + "surveyid_year", + "survey_title", + "survey_conductor", + "survey_coverage", + "welfare_type", + "distribution_type", + "metadata" + ) + expected_metadata <- c( + #"surveyid_year", + "survey_acronym", + "year_start", + "year_end", + "authoring_entity_name", + "abstract", + "collection_dates_cycle", + "collection_dates_start", + "collection_dates_end", + "sampling_procedure", + "collection_mode", + "coll_situation", + "weight", + "cleaning_operations" + ) + + res <- ui_svy_meta(country = "AGO", lkup = lkups) + expect_equal(unique(res$country_code), "AGO") + expect_equal(names(res), + expected_names) + + expect_equal( + names(res$metadata[[1]]), + expected_metadata + ) + + res <- ui_svy_meta(country = "all", lkup = lkups) + + expect_true(all(unique(res$country_code) %in% + lkups$query_controls$country$values)) + + expect_equal(names(res), + expected_names) + + expect_equal( + names(res$metadata[[1]]), + expected_metadata + ) + +}) + + diff --git a/tests/testthat/test-ui_poverty_indicators.R b/tests/testthat/test-ui_poverty_indicators.R new file mode 100644 index 00000000..a6eff13f --- /dev/null +++ b/tests/testthat/test-ui_poverty_indicators.R @@ -0,0 +1,81 @@ +# Tests depend on PIPAPI_DATA_ROOT_FOLDER_LOCAL. Skip if not found. +skip_if(Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL") == "") + +# constants +lkups <- create_versioned_lkups(Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL")) +lkups <- lkups$versions_paths[[lkups$latest_release]] + +test_that("ui_pc_charts() works as expected", { + + # Regular query (fill_gaps = FALSE) + res <- ui_pc_charts(country = "AGO", + povline = 1.9, + lkup = lkups) + expect_equal(class(res), c("data.table", "data.frame")) + expect_equal(names(res), lkups$return_cols$ui_pc_charts$cols) + expect_equal(nrow(res), nrow(lkups$svy_lkup[country_code == "AGO"])) + + skip("TEMPORARY SKIP") + # Regular query (fill_gaps = TRUE) + country <- "AGO" + reporting_years <- length(unique(lkups$ref_lkup$reporting_year)) + expected_years <- reporting_years - nrow(lkups$censored$countries[country_code == country]) + res <- ui_pc_charts(country = country, + povline = 1.9, + fill_gaps = TRUE, + lkup = lkups) + expect_equal(class(res), c("data.table", "data.frame")) + expect_equal(names(res), lkups$return_cols$ui_pc_charts$cols) + expect_equal(nrow(res), expected_years) + + # Group by + res <- ui_pc_charts(country = "AGO", group_by = "wb", povline = 1.9, lkup = lkups) + res2 <- pip(country = "AGO", group_by = "wb", povline = 1.9, lkup = lkups) + res2$reporting_pop <- res2$reporting_pop / 1e6 + res2$pop_in_poverty <- res2$pop_in_poverty / 1e6 + expect_equal(res, res2) +}) + +test_that("ui_pc_regional() works as expected", { + skip("This test fails but this situation should never arise in practice. + The lkup table is incomplete.") + res <- ui_pc_regional(povline = 1.9, lkup = lkups2) + + countries_selected <- lkups2$svy_lkup[, unique(country_code)] + tmp <- lkups2$aux_files$country_list[country_code %in% countries_selected] + + var_code <- grep("_code$", names(tmp), value = TRUE) + + regions_of_countries <- + tmp[, ..var_code] |> + melt(id.vars = "country_code") |> + {\(.) .[, value] }() |> + unique() + + expect_identical( + names(res) |> sort(), + c( + "region_name", + "region_code", + "reporting_year", + "reporting_pop", + "poverty_line", + "headcount", + "poverty_gap", + "poverty_severity", + "watts", + "mean", + "pop_in_poverty" + ) |> sort() + ) + + # DISABLE TEMPORARILY: NEEDS TO BE UPDATED + # expect_identical(unique(res$region_code) |> + # sort(), + # regions_of_countries |> + # sort()) + + + # expect_identical(unique(res$region_code), c("SSA", "WLD")) +}) + diff --git a/tests/testthat/test-utils-plumber.R b/tests/testthat/test-utils-plumber.R index a6a48784..15e718a9 100644 --- a/tests/testthat/test-utils-plumber.R +++ b/tests/testthat/test-utils-plumber.R @@ -445,3 +445,4 @@ test_that("csv serialization returns empty string for missing values", { serialized_response, fixed = TRUE)) }) +