update for Dec 2023

PIP-Technical-Team · Dec 6, 2023 · 468533a · 468533a
2 parents 1ce331a + 9887ac2
commit 468533a
Show file tree

Hide file tree

Showing 44 changed files with 1,747 additions and 1,020 deletions.
diff --git a/.github/workflows/lint-changed-files.yaml b/.github/workflows/lint-changed-files.yaml
@@ -0,0 +1,44 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  pull_request:
+    branches: [main, master]
+
+name: lint-changed-files
+
+jobs:
+  lint-changed-files:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: r-lib/actions/setup-r@v2
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: |
+            any::gh
+            any::lintr
+            any::purrr
+          needs: check
+
+      - name: Add lintr options
+        run: |
+          cat('\noptions(lintr.linter_file = ".lintr")\n', file = "~/.Rprofile", append = TRUE)
+        shell: Rscript {0}
+
+      - name: Install package
+        run: R CMD INSTALL .
+
+      - name: Extract and lint files changed by this PR
+        run: |
+          files <- gh::gh("GET https://api.github.com/repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/files")
+          changed_files <- purrr::map_chr(files, "filename")
+          all_files <- list.files(recursive = TRUE)
+          exclusions_list <- as.list(setdiff(all_files, changed_files))
+          lintr::lint_package(exclusions = exclusions_list)
+        shell: Rscript {0}
+        env:
+          LINTR_ERROR_ON_LINT: true
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: pipapi
 Title: API for the Poverty and Inequality Platform
-Version: 1.3.3
+Version: 1.3.3.9000
 Authors@R: 
     c(person(given = "Tony",
             family = "Fujs",
@@ -34,7 +34,8 @@ Suggests:
     httr,
     jsonlite,
     future.callr,
-    future.apply
+    future.apply,
+    lintr
 Language: en-US
 Imports: 
     data.table,

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,11 @@
+# pipapi (development version)
+## New features
+- Add new SPR and SPL indicators
+
+## Enhancements
+- Address some linting issues
+- Increase test coverage
+
 # pipapi 1.3.3
 - Fix bug with ag_average_poverty_stats
 - Better control of returned columns

diff --git a/R/add_agg_stats.R b/R/add_agg_stats.R
@@ -56,9 +56,10 @@ ag_average_poverty_stats <- function(df, return_cols) {
   ## Only numeric variables will be aggregated or averaged
   ## Year variables must not be modified
   years_vars <- grep("year", names(df), value = TRUE)
-  years_vars <- years_vars[!vapply(df[, ..years_vars],
+  years_vars <- years_vars[!vapply(df[, .SD, .SDcols = years_vars],
                                    is.logical,
                                    FUN.VALUE = logical(1))]
+
   df[, (years_vars) :=
        lapply(.SD, as.character),
      .SDcols = years_vars]
@@ -96,10 +97,6 @@ ag_average_poverty_stats <- function(df, return_cols) {
 
   # STEP 3: Calculations ----------
   ## weighted average  ------
-  totpop <- sum(df$reporting_pop)
-
-  wgt <-  df$reporting_pop/totpop
-
   wgt_df <- df |>
     # this grouping is not necessary, but ensures data.frame as output
     collapse::fgroup_by(c("country_code", "reporting_year", "welfare_type")) |>
@@ -117,7 +114,7 @@ ag_average_poverty_stats <- function(df, return_cols) {
 
   # STEP 4: Format results ----
   ## Bind resulting tables ----
-  out <- cbind(df[1, ..nonum_names], wgt_df, sum_df)
+  out <- cbind(df[1, .SD, .SDcols = nonum_names], wgt_df, sum_df)
 
   ## convert years back to numeric ----
   out[, (years_vars) :=
@@ -131,7 +128,7 @@ ag_average_poverty_stats <- function(df, return_cols) {
   out[, (national_cols) := "national"]
 
   ## set order of obs anc col -------
-  out <- out[, ..orig_names]
+  out <- out[, .SD, .SDcols = orig_names]
   data.table::setcolorder(out, orig_names)
   data.table::setorderv(out, c("country_code", "reporting_year","welfare_type"))
 

diff --git a/R/create_lkups.R b/R/create_lkups.R
@@ -264,10 +264,12 @@ create_lkups <- function(data_dir, versions) {
         'reporting_pce',
         'is_interpolated',
         'distribution_type',
-        'estimation_type'#,
-        # 'spl',
-        # 'spr'
+        'estimation_type',
+        'spl',
+        'spr'
       ),
+
+      # This is not used anywhere anymore.
       dist_stats = c(
         "country_code",
         "reporting_year",
@@ -289,16 +291,16 @@ create_lkups <- function(data_dir, versions) {
         "poverty_severity",
         "watts",
         "mean",
-        "pop_in_poverty"#,
-        #"spr"
+        "pop_in_poverty",
+        "spr"
       ),
       weighted_average_cols = c(
         "headcount",
         "poverty_gap",
         "poverty_severity",
         "watts",
-        "mean"#,
-        #"spr"
+        "mean",
+        "spr"
       )
     ),
     ui_pc_charts = list(
@@ -337,7 +339,8 @@ create_lkups <- function(data_dir, versions) {
         'poverty_gap',
         'poverty_severity',
         'watts',
-        'pop_in_poverty'#, 'spr'
+        'pop_in_poverty',
+        'spr'
       ),
       inequality_indicators = c(
         'median',
@@ -363,8 +366,8 @@ create_lkups <- function(data_dir, versions) {
         "headcount",
         "poverty_gap",
         "poverty_severity",
-        "watts"#,
-        #"spr"
+        "watts",
+        "spr"
       ),
       zero_vars = c(
         "mean",

diff --git a/R/fg_pip.R b/R/fg_pip.R
@@ -119,29 +119,10 @@ fg_pip <- function(country,
   out[,
       poverty_line := round(poverty_line, digits = 3) ]
 
-
-  # add SPL ---------
-
-  data_dir <- lkup$data_root
-  spl <-
-    get_aux_table(data_dir = data_dir,
-                  table = "spl")
-
-  out <- merge.data.table(
-    x = out,
-    y = spl,
-    by = c(
-      "country_code",
-      "reporting_year",
-      "welfare_type",
-      "reporting_level"
-    ),
-    all.x = TRUE
-  )
-
-  if (any(names(out) == "spl_headcount")) {
-    data.table::setnames(out, "spl_headcount", "spr")
-  }
+  # Add SPL and SPR  ---------------
+  out <- add_spl(df        = out,
+                 fill_gaps = TRUE,
+                 data_dir  = lkup$data_root)
 
   return(out)
 }

diff --git a/R/get_aux_table.R b/R/get_aux_table.R
@@ -19,6 +19,7 @@ get_aux_table <- function(data_dir, table, long_format = FALSE) {
   # Strip all "non-word" characters from user input
   sanitized_table <- gsub("\\W", "", table)
 
+
   out <- fst::read_fst(sprintf(
     "%s/_aux/%s.fst",
     data_dir,
@@ -29,7 +30,7 @@ get_aux_table <- function(data_dir, table, long_format = FALSE) {
     out <- data.table::melt(out,
                             id.vars = c('country_code', 'data_level'),
                             variable.name = "year")
-    data.table::setorder(out, country_code, year, data_level)
+    data.table::setorder(out, "country_code", "year", "data_level")
   }
 
   return(out)

diff --git a/R/get_param_values.R b/R/get_param_values.R
@@ -12,7 +12,7 @@ get_param_values <- function(lkup,
                                           "pip-grp",
                                           "pip-info",
                                           "valid-params")) {
-  endpoint <- endpoint[1] # Ensure it only passes one endpoint at a time
+  # endpoint <- endpoint[1] # Ensure it only passes one endpoint at a time
   endpoint <- match.arg(endpoint)
   # TO IMPROVE: Too much hard-coding here
   endpoint_map <- c("all",

diff --git a/R/get_pip_version.R b/R/get_pip_version.R
@@ -6,7 +6,7 @@
 #' @export
 get_pip_version <- function(pip_packages = c("pipapi",
                                              "wbpip"),
-                            data_versions = lkups$versions) {
+                            data_versions) {
 
   # PIP package versions
   core_packages <- lapply(pip_packages, function(x){

diff --git a/R/pip.R b/R/pip.R
@@ -68,6 +68,8 @@ pip <- function(country         = "ALL",
                 additional_ind  = FALSE) {
 
 
+  # set up -------------
+
   welfare_type    <- match.arg(welfare_type)
   reporting_level <- match.arg(reporting_level)
   group_by        <- match.arg(group_by)
@@ -83,6 +85,7 @@ pip <- function(country         = "ALL",
     stop("You are probably passing more than one dataset as lkup argument.
   Try passing a single one by subsetting it lkup <- lkups$versions_paths$dataset_name_PROD")
 
+
   # **** TO BE REMOVED **** REMOVAL STARTS HERE
   # Once `pip-grp` has been integrated in ingestion pipeline
   # Forces fill_gaps to TRUE when using group_by option
@@ -92,6 +95,7 @@ pip <- function(country         = "ALL",
   }
   # **** TO BE REMOVED **** REMOVAL ENDS HERE
 
+  # Countries vector ------------
   lcv <- # List with countries vectors
     create_countries_vctr(
       country         =  country,
@@ -100,9 +104,9 @@ pip <- function(country         = "ALL",
       aux_files       =  lkup$aux_files
     )
 
-
+  # mains estimates ---------------
   if (fill_gaps) {
-    # Compute imputed stats
+    ## lineup years-----------------
     out <- fg_pip(
       country            = lcv$est_ctrs,
       year               = year,
@@ -114,7 +118,7 @@ pip <- function(country         = "ALL",
       lkup               = lkup
       )
   } else {
-    # Compute survey year stats
+    ## survey years ------------------
     out <- rg_pip(
       country         = lcv$est_ctrs,
       year            = year,
@@ -127,15 +131,17 @@ pip <- function(country         = "ALL",
     )
   }
 
-  # return empty dataframe if no metadata is found
+  # Eary return for empty table---------------
   if (nrow(out) == 0) {
     return(out)
   }
 
-  # Handles aggregated distributions
+  # aggregate distributions ------------------
   if (reporting_level %in% c("national", "all")) {
-    out <- add_agg_stats(out,
-                         return_cols = lkup$return_cols$ag_average_poverty_stats)
+    out <- add_agg_stats(
+      df = out,
+      return_cols = lkup$return_cols$ag_average_poverty_stats
+      )
     if (reporting_level == "national") {
       out <- out[reporting_level == "national"]
     }
@@ -175,7 +181,8 @@ pip <- function(country         = "ALL",
   }
   # **** TO BE REMOVED **** REMOVAL ENDS HERE
 
-  # Add pre-computed distributional statistics
+
+  # pre-computed distributional stats ---------------
   crr_names  <- names(out)    # current variables
   names2keep <- lkup$return_cols$pip$cols # all variables
 
@@ -184,14 +191,22 @@ pip <- function(country         = "ALL",
     dist_stats = lkup[["dist_stats"]]
   )
 
+  # Add aggregate medians ----------------
+  out <- add_agg_medians(
+    df        = out,
+    fill_gaps = fill_gaps,
+    data_dir  = lkup$data_root
+  )
+
+  # format ----------------
+  ## Inequality indicators to NA for lineup years ----
   if (fill_gaps) {
-    # Convert inequality indicators to NA
+
     dist_vars  <- names2keep[!(names2keep %in% crr_names)]
     out[,
         (dist_vars) := NA_real_]
   }
-
-  # Handle survey coverage
+  ## Handle survey coverage ------------
   if (reporting_level != "all") {
     keep <- out$reporting_level == reporting_level
     out <- out[keep, ]
@@ -228,6 +243,6 @@ pip <- function(country         = "ALL",
   # Order rows by country code and reporting year
   data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type)
 
-
+  # return -------------
   return(out)
 }
diff --git a/R/pip_grp.R b/R/pip_grp.R
@@ -97,7 +97,7 @@ pip_grp <- function(country         = "ALL",
   }
 
   keep <- lkup$return_cols$pip_grp$cols
-  out <- out[, ..keep]
+  out <- out[, .SD, .SDcols = keep]
 
   return(out)
 }
@@ -154,7 +154,7 @@ pip_aggregate <- function(df, by = NULL, return_cols) {
 
   # Handle simple aggregation
 
-  df <- df[, ..to_keep]
+  df <- df[, .SD, .SDcols = to_keep]
 
   byvar <- c(by, "reporting_year", "poverty_line")
 
@@ -217,7 +217,7 @@ pip_aggregate_by <- function(df,
 
   to_keep <- all_cols[all_cols != "pop_in_poverty"]
 
-  df <- df[, ..to_keep]
+  df <- df[, .SD, .SDcols = to_keep]
 
   group_lkup <- group_lkup[, c("region_code",
                                "reporting_year",