Skip to content

Commit

Permalink
update for Dec 2023
Browse files Browse the repository at this point in the history
update for Dec 2023
  • Loading branch information
randrescastaneda authored Dec 6, 2023
2 parents 1ce331a + 9887ac2 commit 468533a
Show file tree
Hide file tree
Showing 44 changed files with 1,747 additions and 1,020 deletions.
44 changes: 44 additions & 0 deletions .github/workflows/lint-changed-files.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
pull_request:
branches: [main, master]

name: lint-changed-files

jobs:
lint-changed-files:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v3

- uses: r-lib/actions/setup-r@v2

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: |
any::gh
any::lintr
any::purrr
needs: check

- name: Add lintr options
run: |
cat('\noptions(lintr.linter_file = ".lintr")\n', file = "~/.Rprofile", append = TRUE)
shell: Rscript {0}

- name: Install package
run: R CMD INSTALL .

- name: Extract and lint files changed by this PR
run: |
files <- gh::gh("GET https://api.github.com/repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/files")
changed_files <- purrr::map_chr(files, "filename")
all_files <- list.files(recursive = TRUE)
exclusions_list <- as.list(setdiff(all_files, changed_files))
lintr::lint_package(exclusions = exclusions_list)
shell: Rscript {0}
env:
LINTR_ERROR_ON_LINT: true
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: pipapi
Title: API for the Poverty and Inequality Platform
Version: 1.3.3
Version: 1.3.3.9000
Authors@R:
c(person(given = "Tony",
family = "Fujs",
Expand Down Expand Up @@ -34,7 +34,8 @@ Suggests:
httr,
jsonlite,
future.callr,
future.apply
future.apply,
lintr
Language: en-US
Imports:
data.table,
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# pipapi (development version)
## New features
- Add new SPR and SPL indicators

## Enhancements
- Address some linting issues
- Increase test coverage

# pipapi 1.3.3
- Fix bug with ag_average_poverty_stats
- Better control of returned columns
Expand Down
11 changes: 4 additions & 7 deletions R/add_agg_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,10 @@ ag_average_poverty_stats <- function(df, return_cols) {
## Only numeric variables will be aggregated or averaged
## Year variables must not be modified
years_vars <- grep("year", names(df), value = TRUE)
years_vars <- years_vars[!vapply(df[, ..years_vars],
years_vars <- years_vars[!vapply(df[, .SD, .SDcols = years_vars],
is.logical,
FUN.VALUE = logical(1))]

df[, (years_vars) :=
lapply(.SD, as.character),
.SDcols = years_vars]
Expand Down Expand Up @@ -96,10 +97,6 @@ ag_average_poverty_stats <- function(df, return_cols) {

# STEP 3: Calculations ----------
## weighted average ------
totpop <- sum(df$reporting_pop)

wgt <- df$reporting_pop/totpop

wgt_df <- df |>
# this grouping is not necessary, but ensures data.frame as output
collapse::fgroup_by(c("country_code", "reporting_year", "welfare_type")) |>
Expand All @@ -117,7 +114,7 @@ ag_average_poverty_stats <- function(df, return_cols) {

# STEP 4: Format results ----
## Bind resulting tables ----
out <- cbind(df[1, ..nonum_names], wgt_df, sum_df)
out <- cbind(df[1, .SD, .SDcols = nonum_names], wgt_df, sum_df)

## convert years back to numeric ----
out[, (years_vars) :=
Expand All @@ -131,7 +128,7 @@ ag_average_poverty_stats <- function(df, return_cols) {
out[, (national_cols) := "national"]

## set order of obs anc col -------
out <- out[, ..orig_names]
out <- out[, .SD, .SDcols = orig_names]
data.table::setcolorder(out, orig_names)
data.table::setorderv(out, c("country_code", "reporting_year","welfare_type"))

Expand Down
23 changes: 13 additions & 10 deletions R/create_lkups.R
Original file line number Diff line number Diff line change
Expand Up @@ -264,10 +264,12 @@ create_lkups <- function(data_dir, versions) {
'reporting_pce',
'is_interpolated',
'distribution_type',
'estimation_type'#,
# 'spl',
# 'spr'
'estimation_type',
'spl',
'spr'
),

# This is not used anywhere anymore.
dist_stats = c(
"country_code",
"reporting_year",
Expand All @@ -289,16 +291,16 @@ create_lkups <- function(data_dir, versions) {
"poverty_severity",
"watts",
"mean",
"pop_in_poverty"#,
#"spr"
"pop_in_poverty",
"spr"
),
weighted_average_cols = c(
"headcount",
"poverty_gap",
"poverty_severity",
"watts",
"mean"#,
#"spr"
"mean",
"spr"
)
),
ui_pc_charts = list(
Expand Down Expand Up @@ -337,7 +339,8 @@ create_lkups <- function(data_dir, versions) {
'poverty_gap',
'poverty_severity',
'watts',
'pop_in_poverty'#, 'spr'
'pop_in_poverty',
'spr'
),
inequality_indicators = c(
'median',
Expand All @@ -363,8 +366,8 @@ create_lkups <- function(data_dir, versions) {
"headcount",
"poverty_gap",
"poverty_severity",
"watts"#,
#"spr"
"watts",
"spr"
),
zero_vars = c(
"mean",
Expand Down
27 changes: 4 additions & 23 deletions R/fg_pip.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,29 +119,10 @@ fg_pip <- function(country,
out[,
poverty_line := round(poverty_line, digits = 3) ]


# add SPL ---------

data_dir <- lkup$data_root
spl <-
get_aux_table(data_dir = data_dir,
table = "spl")

out <- merge.data.table(
x = out,
y = spl,
by = c(
"country_code",
"reporting_year",
"welfare_type",
"reporting_level"
),
all.x = TRUE
)

if (any(names(out) == "spl_headcount")) {
data.table::setnames(out, "spl_headcount", "spr")
}
# Add SPL and SPR ---------------
out <- add_spl(df = out,
fill_gaps = TRUE,
data_dir = lkup$data_root)

return(out)
}
Expand Down
3 changes: 2 additions & 1 deletion R/get_aux_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ get_aux_table <- function(data_dir, table, long_format = FALSE) {
# Strip all "non-word" characters from user input
sanitized_table <- gsub("\\W", "", table)


out <- fst::read_fst(sprintf(
"%s/_aux/%s.fst",
data_dir,
Expand All @@ -29,7 +30,7 @@ get_aux_table <- function(data_dir, table, long_format = FALSE) {
out <- data.table::melt(out,
id.vars = c('country_code', 'data_level'),
variable.name = "year")
data.table::setorder(out, country_code, year, data_level)
data.table::setorder(out, "country_code", "year", "data_level")
}

return(out)
Expand Down
2 changes: 1 addition & 1 deletion R/get_param_values.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ get_param_values <- function(lkup,
"pip-grp",
"pip-info",
"valid-params")) {
endpoint <- endpoint[1] # Ensure it only passes one endpoint at a time
# endpoint <- endpoint[1] # Ensure it only passes one endpoint at a time
endpoint <- match.arg(endpoint)
# TO IMPROVE: Too much hard-coding here
endpoint_map <- c("all",
Expand Down
2 changes: 1 addition & 1 deletion R/get_pip_version.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#' @export
get_pip_version <- function(pip_packages = c("pipapi",
"wbpip"),
data_versions = lkups$versions) {
data_versions) {

# PIP package versions
core_packages <- lapply(pip_packages, function(x){
Expand Down
39 changes: 27 additions & 12 deletions R/pip.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ pip <- function(country = "ALL",
additional_ind = FALSE) {


# set up -------------

welfare_type <- match.arg(welfare_type)
reporting_level <- match.arg(reporting_level)
group_by <- match.arg(group_by)
Expand All @@ -83,6 +85,7 @@ pip <- function(country = "ALL",
stop("You are probably passing more than one dataset as lkup argument.
Try passing a single one by subsetting it lkup <- lkups$versions_paths$dataset_name_PROD")


# **** TO BE REMOVED **** REMOVAL STARTS HERE
# Once `pip-grp` has been integrated in ingestion pipeline
# Forces fill_gaps to TRUE when using group_by option
Expand All @@ -92,6 +95,7 @@ pip <- function(country = "ALL",
}
# **** TO BE REMOVED **** REMOVAL ENDS HERE

# Countries vector ------------
lcv <- # List with countries vectors
create_countries_vctr(
country = country,
Expand All @@ -100,9 +104,9 @@ pip <- function(country = "ALL",
aux_files = lkup$aux_files
)


# mains estimates ---------------
if (fill_gaps) {
# Compute imputed stats
## lineup years-----------------
out <- fg_pip(
country = lcv$est_ctrs,
year = year,
Expand All @@ -114,7 +118,7 @@ pip <- function(country = "ALL",
lkup = lkup
)
} else {
# Compute survey year stats
## survey years ------------------
out <- rg_pip(
country = lcv$est_ctrs,
year = year,
Expand All @@ -127,15 +131,17 @@ pip <- function(country = "ALL",
)
}

# return empty dataframe if no metadata is found
# Eary return for empty table---------------
if (nrow(out) == 0) {
return(out)
}

# Handles aggregated distributions
# aggregate distributions ------------------
if (reporting_level %in% c("national", "all")) {
out <- add_agg_stats(out,
return_cols = lkup$return_cols$ag_average_poverty_stats)
out <- add_agg_stats(
df = out,
return_cols = lkup$return_cols$ag_average_poverty_stats
)
if (reporting_level == "national") {
out <- out[reporting_level == "national"]
}
Expand Down Expand Up @@ -175,7 +181,8 @@ pip <- function(country = "ALL",
}
# **** TO BE REMOVED **** REMOVAL ENDS HERE

# Add pre-computed distributional statistics

# pre-computed distributional stats ---------------
crr_names <- names(out) # current variables
names2keep <- lkup$return_cols$pip$cols # all variables

Expand All @@ -184,14 +191,22 @@ pip <- function(country = "ALL",
dist_stats = lkup[["dist_stats"]]
)

# Add aggregate medians ----------------
out <- add_agg_medians(
df = out,
fill_gaps = fill_gaps,
data_dir = lkup$data_root
)

# format ----------------
## Inequality indicators to NA for lineup years ----
if (fill_gaps) {
# Convert inequality indicators to NA

dist_vars <- names2keep[!(names2keep %in% crr_names)]
out[,
(dist_vars) := NA_real_]
}

# Handle survey coverage
## Handle survey coverage ------------
if (reporting_level != "all") {
keep <- out$reporting_level == reporting_level
out <- out[keep, ]
Expand Down Expand Up @@ -228,6 +243,6 @@ pip <- function(country = "ALL",
# Order rows by country code and reporting year
data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type)


# return -------------
return(out)
}
6 changes: 3 additions & 3 deletions R/pip_grp.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ pip_grp <- function(country = "ALL",
}

keep <- lkup$return_cols$pip_grp$cols
out <- out[, ..keep]
out <- out[, .SD, .SDcols = keep]

return(out)
}
Expand Down Expand Up @@ -154,7 +154,7 @@ pip_aggregate <- function(df, by = NULL, return_cols) {

# Handle simple aggregation

df <- df[, ..to_keep]
df <- df[, .SD, .SDcols = to_keep]

byvar <- c(by, "reporting_year", "poverty_line")

Expand Down Expand Up @@ -217,7 +217,7 @@ pip_aggregate_by <- function(df,

to_keep <- all_cols[all_cols != "pop_in_poverty"]

df <- df[, ..to_keep]
df <- df[, .SD, .SDcols = to_keep]

group_lkup <- group_lkup[, c("region_code",
"reporting_year",
Expand Down
Loading

0 comments on commit 468533a

Please sign in to comment.