PIP-Technical-Team · Aeilert · Feb 11, 2022 · Feb 21, 2022 · Feb 21, 2022 · Feb 21, 2022
diff --git a/R/add_agg_stats.R b/R/add_agg_stats.R
@@ -13,7 +13,7 @@ add_agg_stats <- function(df, except = c("ARG", "URY")) {
     aggregated_list <- split(aggregated,
       interaction(
         aggregated$country_code,
-        aggregated$reporting_year
+        aggregated$year
       ),
       drop = TRUE
     )
@@ -54,7 +54,7 @@ ag_average_poverty_stats <- function(df) {
   out[, na_cols] <- NA_real_
 
   # Compute population weighted average
-  wgt_urban <- dfu$reporting_pop / sum(df$reporting_pop)
+  wgt_urban <- dfu$pop / sum(df$pop)
   wgt_rural <- 1 - wgt_urban
 
   # Weighted national mean
@@ -96,9 +96,9 @@ ag_average_poverty_stats <- function(df) {
   }
 
   # Update other variables
-  out$reporting_pop <- sum(df$reporting_pop)
+  out$pop <- sum(df$pop)
   national_cols <- c("reporting_level", "gdp_data_level",
-                     "pce_data_level", "cpi_data_level", "ppp_data_level")
+                     "hfce_data_level", "cpi_data_level", "ppp_data_level")
   out[, national_cols] <- "national"
 
   return(out)

diff --git a/R/create_lkups.R b/R/create_lkups.R
@@ -89,6 +89,7 @@ create_lkups <- function(data_dir, versions) {
   )
   # TEMP cleaning - START
   svy_lkup <- svy_lkup[svy_lkup$cache_id %in% paths_ids, ]
+  svy_lkup <- rename_cols(svy_lkup)
   # TEMP cleaning - END
   svy_lkup$path <- sprintf(
     "%s/survey_data/%s.fst",
@@ -100,6 +101,7 @@ create_lkups <- function(data_dir, versions) {
   )
   # TEMP cleaning - START
   ref_lkup <- ref_lkup[ref_lkup$cache_id %in% paths_ids, ]
+  ref_lkup <- rename_cols(ref_lkup)
   # TEMP cleaning - END
   ref_lkup$path <- sprintf(
     "%s/survey_data/%s.fst",
@@ -127,7 +129,7 @@ create_lkups <- function(data_dir, versions) {
     reporting_level = unique(tmp_metadata[["reporting_level"]])
     paths <- unique(tmp_metadata$path)
     ctry_years <- unique(tmp_metadata[, c(
-      "country_code", "reporting_year",
+      "country_code", "year",
       "reporting_level", "interpolation_id"
     )])
 
@@ -145,44 +147,39 @@ create_lkups <- function(data_dir, versions) {
   # Load dist_stats
   dist_stats <- fst::read_fst(sprintf("%s/estimations/dist_stats.fst", data_dir),
                               as.data.table = TRUE)
+  dist_stats <- rename_cols(dist_stats)
 
   # Load pop_region
   pop_region <- fst::read_fst(sprintf("%s/_aux/pop_region.fst", data_dir),
     as.data.table = TRUE)
+  pop_region <- rename_cols(pop_region)
 
   # Load country profiles lkups
   cp_lkups <- readRDS(sprintf("%s/_aux/country_profiles.RDS", data_dir))
+  cp_lkups$key_indicators <- lapply(cp_lkups$key_indicators, rename_cols)
+  cp_lkups$charts <- lapply(cp_lkups$charts, rename_cols)
 
   # Load poverty lines table
   pl_lkup <- fst::read_fst(sprintf("%s/_aux/poverty_lines.fst", data_dir),
     as.data.table = TRUE)
 
   # Load list with censor tables
   censored <- readRDS(sprintf("%s/_aux/censored.RDS", data_dir))
+  censored <- lapply(censored, rename_cols)
 
   # Create pip return columns
   pip_cols <-
-    c('region_code', 'country_code', 'reporting_year',
-      'reporting_level', 'survey_acronym', 'survey_coverage',
-      'survey_year', 'welfare_type', 'survey_comparability',
+    c('region_code', 'country_code', 'year',
+      'reporting_level','survey_acronym', 'survey_coverage',
+      'welfare_time', 'welfare_type', 'survey_comparability',
       'comparable_spell', 'poverty_line',
       'headcount', 'poverty_gap', 'poverty_severity', 'watts',
       'mean', 'median', 'mld', 'gini', 'polarization',
       'decile1', 'decile2', 'decile3', 'decile4', 'decile5',
       'decile6', 'decile7', 'decile8', 'decile9', 'decile10',
-       # 'survey_mean_lcu', 'survey_mean_ppp', # Do we need these?
-       # 'predicted_mean_ppp', # Do we need this?
-      'cpi', #'cpi_data_level',
-      'ppp', #'ppp_data_level',
-      'reporting_pop', #'pop_data_level',
-      'reporting_gdp', #'gdp_data_level',
-      'reporting_pce', #'pce_data_level',
-      'is_interpolated', # 'is_used_for_aggregation',
-      'distribution_type',
+      'cpi', 'ppp', 'pop', 'gdp', 'hfce',
+      'is_interpolated', 'distribution_type',
       'estimation_type'
-      # 'gd_type', 'path',
-      # 'cache_id', 'survey_id', 'surveyid_year'
-      # 'wb_region_code', 'interpolation_id'
     )
 
   # Create list of available auxiliary data tables

diff --git a/R/fg_pip.R b/R/fg_pip.R
@@ -63,13 +63,13 @@ fg_pip <- function(country,
       # Compute estimated statistics using the fill_gap method
       if (debug) debugonce(wbpip:::prod_fg_compute_pip_stats)
       tmp_stats <- wbpip:::prod_fg_compute_pip_stats(
-        request_year = ctry_years[["reporting_year"]][ctry_year_id],
+        request_year = ctry_years[["year"]][ctry_year_id],
         data = svy_data,
         predicted_request_mean = tmp_metadata[["predicted_mean_ppp"]],
         svy_mean_lcu = tmp_metadata[["survey_mean_lcu"]],
         svy_median_lcu = tmp_metadata$survey_median_lcu,
         svy_median_ppp = tmp_metadata$survey_median_ppp,
-        survey_year = tmp_metadata[["survey_year"]],
+        survey_year = tmp_metadata[["welfare_time"]],
         default_ppp = tmp_metadata[["ppp"]],
         ppp = ppp,
         distribution_type = tmp_metadata[["distribution_type"]],
@@ -100,7 +100,7 @@ fg_pip <- function(country,
   out <- data.table::rbindlist(out)
 
   # Set collapse vars to NA (by type)
-  vars_to_collapse_real <- c("survey_year",
+  vars_to_collapse_real <- c("welfare_time",
                              "predicted_mean_ppp",
                              "survey_mean_lcu",
                              "survey_mean_ppp",

diff --git a/R/get_aux_table.R b/R/get_aux_table.R
@@ -12,6 +12,7 @@ get_aux_table <- function(data_dir, table) {
     data_dir,
     table
   ))
+  out <- rename_cols(out)
 
   return(out)
 }
diff --git a/R/pip.R b/R/pip.R
@@ -133,6 +133,11 @@ pip <- function(country = "all",
       out <- censor_rows(out, lkup[["censored"]], type = "regions")
     }
 
+    out <-
+      data.table::setnames(out,
+                           old = c('year', 'pop'),
+                           new = c('year', 'pop'))
+
     return(out)
   }
   # **** TO BE REMOVED **** REMOVAL ENDS HERE

diff --git a/R/pip_grp.R b/R/pip_grp.R
@@ -91,8 +91,8 @@ pip_grp <- function(country = "all",
   }
 
   out <- out[, c("region_code",
-                 "reporting_year",
-                 "reporting_pop",
+                 "year",
+                 "pop",
                  "poverty_line",
                  "headcount",
                  "poverty_gap",
@@ -109,42 +109,42 @@ pip_aggregate <- function(df) {
   # Handle simple aggregation
   df <- df[, .(
     region_code,
-    reporting_year,
+    year,
     poverty_line,
     mean,
     headcount,
     poverty_gap,
     poverty_severity,
     watts,
-    reporting_pop
+    pop
   )]
 
   # Compute population totals
   pop <- df[, lapply(.SD,
                      base::sum,
                      na.rm = TRUE),
-            by = .(reporting_year, poverty_line),
-            .SDcols = "reporting_pop"
+            by = .(year, poverty_line),
+            .SDcols = "pop"
   ]
 
   # Compute stats weighted average by groups
   cols <- c("headcount", "poverty_gap", "poverty_severity", "watts", "mean")
   df <- df[, lapply(.SD,
                       stats::weighted.mean,
-                      w = reporting_pop,
+                      w = pop,
                       na.rm = TRUE),
-             by = .(reporting_year, poverty_line),
+             by = .(year, poverty_line),
              .SDcols = cols
   ]
 
   # Combine results
-  df <- df[pop, on = .(reporting_year, poverty_line)]
+  df <- df[pop, on = .(year, poverty_line)]
 
   df$region_code <- "CUSTOM"
 
 
   # Compute population living in poverty
-  df <- df[, pop_in_poverty := round(headcount * reporting_pop, 0)]
+  df <- df[, pop_in_poverty := round(headcount * pop, 0)]
 
   return(df)
 }
@@ -160,27 +160,27 @@ pip_aggregate_by <- function(df, group_lkup) {
 
   df <- df[, .(
     region_code,
-    reporting_year,
+    year,
     poverty_line,
     mean,
     headcount,
     poverty_gap,
     poverty_severity,
     watts,
-    reporting_pop
+    pop
   )]
 
   cols <- c("headcount", "poverty_gap", "poverty_severity", "watts", "mean")
-  group_lkup <- group_lkup[, c("region_code", "reporting_year", "reporting_pop")]
+  group_lkup <- group_lkup[, c("region_code", "year", "pop")]
 
   # Compute stats weighted average by groups
-  rgn <- df[, lapply(.SD, stats::weighted.mean, w = reporting_pop, na.rm = TRUE),
-            by = .(region_code, reporting_year, poverty_line),
+  rgn <- df[, lapply(.SD, stats::weighted.mean, w = pop, na.rm = TRUE),
+            by = .(region_code, year, poverty_line),
             .SDcols = cols
   ]
 
   rgn <- group_lkup[rgn,
-                    on = .(region_code, reporting_year),
+                    on = .(region_code, year),
                     allow.cartesian = TRUE
   ]
 
@@ -192,7 +192,7 @@ pip_aggregate_by <- function(df, group_lkup) {
   out <- rbind(rgn, wld, fill = TRUE)
 
   # Compute population living in poverty
-  out <- out[, pop_in_poverty := round(headcount * reporting_pop, 0)]
+  out <- out[, pop_in_poverty := round(headcount * pop, 0)]
 
   return(out)
 }
@@ -202,17 +202,17 @@ compute_world_aggregates <- function(rgn, cols) {
   # Compute stats
   wld <- rgn[, lapply(.SD,
                       stats::weighted.mean,
-                      w = reporting_pop,
+                      w = pop,
                       na.rm = TRUE),
-             by = .(reporting_year, poverty_line),
+             by = .(year, poverty_line),
              .SDcols = cols
   ]
   # Compute yearly population WLD totals
-  tmp <- rgn[, .(reporting_pop = sum(reporting_pop)),
-             by = .(reporting_year)]
+  tmp <- rgn[, .(pop = sum(pop)),
+             by = .(year)]
 
 
-  wld <- wld[tmp, on = .(reporting_year = reporting_year)]
+  wld <- wld[tmp, on = .(year = year)]
   wld[["region_code"]] <- "WLD"
 
   return(wld)
@@ -230,7 +230,7 @@ filter_for_aggregate_by <- function(df) {
   # Otherwise, use whatever is available
 
   out <- df[, check := length(reporting_level),
-            by = c("country_code", "reporting_year", "poverty_line")]
+            by = c("country_code", "year", "poverty_line")]
   out <- out[out$check == 1 | (out$check > 1 & reporting_level == "national"), ]
 
   return(out)