Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename pip() resp columns #207

Open
wants to merge 10 commits into
base: DEV
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions R/add_agg_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ add_agg_stats <- function(df, except = c("ARG", "URY")) {
aggregated_list <- split(aggregated,
interaction(
aggregated$country_code,
aggregated$reporting_year
aggregated$year
),
drop = TRUE
)
Expand Down Expand Up @@ -54,7 +54,7 @@ ag_average_poverty_stats <- function(df) {
out[, na_cols] <- NA_real_

# Compute population weighted average
wgt_urban <- dfu$reporting_pop / sum(df$reporting_pop)
wgt_urban <- dfu$pop / sum(df$pop)
wgt_rural <- 1 - wgt_urban

# Weighted national mean
Expand Down Expand Up @@ -96,9 +96,9 @@ ag_average_poverty_stats <- function(df) {
}

# Update other variables
out$reporting_pop <- sum(df$reporting_pop)
out$pop <- sum(df$pop)
national_cols <- c("reporting_level", "gdp_data_level",
"pce_data_level", "cpi_data_level", "ppp_data_level")
"hfce_data_level", "cpi_data_level", "ppp_data_level")
out[, national_cols] <- "national"

return(out)
Expand Down
29 changes: 13 additions & 16 deletions R/create_lkups.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ create_lkups <- function(data_dir, versions) {
)
# TEMP cleaning - START
svy_lkup <- svy_lkup[svy_lkup$cache_id %in% paths_ids, ]
svy_lkup <- rename_cols(svy_lkup)
# TEMP cleaning - END
svy_lkup$path <- sprintf(
"%s/survey_data/%s.fst",
Expand All @@ -100,6 +101,7 @@ create_lkups <- function(data_dir, versions) {
)
# TEMP cleaning - START
ref_lkup <- ref_lkup[ref_lkup$cache_id %in% paths_ids, ]
ref_lkup <- rename_cols(ref_lkup)
# TEMP cleaning - END
ref_lkup$path <- sprintf(
"%s/survey_data/%s.fst",
Expand Down Expand Up @@ -127,7 +129,7 @@ create_lkups <- function(data_dir, versions) {
reporting_level = unique(tmp_metadata[["reporting_level"]])
paths <- unique(tmp_metadata$path)
ctry_years <- unique(tmp_metadata[, c(
"country_code", "reporting_year",
"country_code", "year",
"reporting_level", "interpolation_id"
)])

Expand All @@ -145,44 +147,39 @@ create_lkups <- function(data_dir, versions) {
# Load dist_stats
dist_stats <- fst::read_fst(sprintf("%s/estimations/dist_stats.fst", data_dir),
as.data.table = TRUE)
dist_stats <- rename_cols(dist_stats)

# Load pop_region
pop_region <- fst::read_fst(sprintf("%s/_aux/pop_region.fst", data_dir),
as.data.table = TRUE)
pop_region <- rename_cols(pop_region)

# Load country profiles lkups
cp_lkups <- readRDS(sprintf("%s/_aux/country_profiles.RDS", data_dir))
cp_lkups$key_indicators <- lapply(cp_lkups$key_indicators, rename_cols)
cp_lkups$charts <- lapply(cp_lkups$charts, rename_cols)

# Load poverty lines table
pl_lkup <- fst::read_fst(sprintf("%s/_aux/poverty_lines.fst", data_dir),
as.data.table = TRUE)

# Load list with censor tables
censored <- readRDS(sprintf("%s/_aux/censored.RDS", data_dir))
censored <- lapply(censored, rename_cols)

# Create pip return columns
pip_cols <-
c('region_code', 'country_code', 'reporting_year',
'reporting_level', 'survey_acronym', 'survey_coverage',
'survey_year', 'welfare_type', 'survey_comparability',
c('region_code', 'country_code', 'year',
'reporting_level','survey_acronym', 'survey_coverage',
'welfare_time', 'welfare_type', 'survey_comparability',
'comparable_spell', 'poverty_line',
'headcount', 'poverty_gap', 'poverty_severity', 'watts',
'mean', 'median', 'mld', 'gini', 'polarization',
'decile1', 'decile2', 'decile3', 'decile4', 'decile5',
'decile6', 'decile7', 'decile8', 'decile9', 'decile10',
# 'survey_mean_lcu', 'survey_mean_ppp', # Do we need these?
# 'predicted_mean_ppp', # Do we need this?
'cpi', #'cpi_data_level',
'ppp', #'ppp_data_level',
'reporting_pop', #'pop_data_level',
'reporting_gdp', #'gdp_data_level',
'reporting_pce', #'pce_data_level',
'is_interpolated', # 'is_used_for_aggregation',
'distribution_type',
'cpi', 'ppp', 'pop', 'gdp', 'hfce',
'is_interpolated', 'distribution_type',
'estimation_type'
# 'gd_type', 'path',
# 'cache_id', 'survey_id', 'surveyid_year'
# 'wb_region_code', 'interpolation_id'
)

# Create list of available auxiliary data tables
Expand Down
6 changes: 3 additions & 3 deletions R/fg_pip.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,13 @@ fg_pip <- function(country,
# Compute estimated statistics using the fill_gap method
if (debug) debugonce(wbpip:::prod_fg_compute_pip_stats)
tmp_stats <- wbpip:::prod_fg_compute_pip_stats(
request_year = ctry_years[["reporting_year"]][ctry_year_id],
request_year = ctry_years[["year"]][ctry_year_id],
data = svy_data,
predicted_request_mean = tmp_metadata[["predicted_mean_ppp"]],
svy_mean_lcu = tmp_metadata[["survey_mean_lcu"]],
svy_median_lcu = tmp_metadata$survey_median_lcu,
svy_median_ppp = tmp_metadata$survey_median_ppp,
survey_year = tmp_metadata[["survey_year"]],
survey_year = tmp_metadata[["welfare_time"]],
default_ppp = tmp_metadata[["ppp"]],
ppp = ppp,
distribution_type = tmp_metadata[["distribution_type"]],
Expand Down Expand Up @@ -100,7 +100,7 @@ fg_pip <- function(country,
out <- data.table::rbindlist(out)

# Set collapse vars to NA (by type)
vars_to_collapse_real <- c("survey_year",
vars_to_collapse_real <- c("welfare_time",
"predicted_mean_ppp",
"survey_mean_lcu",
"survey_mean_ppp",
Expand Down
1 change: 1 addition & 0 deletions R/get_aux_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ get_aux_table <- function(data_dir, table) {
data_dir,
table
))
out <- rename_cols(out)

return(out)
}
5 changes: 5 additions & 0 deletions R/pip.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ pip <- function(country = "all",
out <- censor_rows(out, lkup[["censored"]], type = "regions")
}

out <-
data.table::setnames(out,
old = c('year', 'pop'),
new = c('year', 'pop'))

return(out)
}
# **** TO BE REMOVED **** REMOVAL ENDS HERE
Expand Down
46 changes: 23 additions & 23 deletions R/pip_grp.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ pip_grp <- function(country = "all",
}

out <- out[, c("region_code",
"reporting_year",
"reporting_pop",
"year",
"pop",
"poverty_line",
"headcount",
"poverty_gap",
Expand All @@ -109,42 +109,42 @@ pip_aggregate <- function(df) {
# Handle simple aggregation
df <- df[, .(
region_code,
reporting_year,
year,
poverty_line,
mean,
headcount,
poverty_gap,
poverty_severity,
watts,
reporting_pop
pop
)]

# Compute population totals
pop <- df[, lapply(.SD,
base::sum,
na.rm = TRUE),
by = .(reporting_year, poverty_line),
.SDcols = "reporting_pop"
by = .(year, poverty_line),
.SDcols = "pop"
]

# Compute stats weighted average by groups
cols <- c("headcount", "poverty_gap", "poverty_severity", "watts", "mean")
df <- df[, lapply(.SD,
stats::weighted.mean,
w = reporting_pop,
w = pop,
na.rm = TRUE),
by = .(reporting_year, poverty_line),
by = .(year, poverty_line),
.SDcols = cols
]

# Combine results
df <- df[pop, on = .(reporting_year, poverty_line)]
df <- df[pop, on = .(year, poverty_line)]

df$region_code <- "CUSTOM"


# Compute population living in poverty
df <- df[, pop_in_poverty := round(headcount * reporting_pop, 0)]
df <- df[, pop_in_poverty := round(headcount * pop, 0)]

return(df)
}
Expand All @@ -160,27 +160,27 @@ pip_aggregate_by <- function(df, group_lkup) {

df <- df[, .(
region_code,
reporting_year,
year,
poverty_line,
mean,
headcount,
poverty_gap,
poverty_severity,
watts,
reporting_pop
pop
)]

cols <- c("headcount", "poverty_gap", "poverty_severity", "watts", "mean")
group_lkup <- group_lkup[, c("region_code", "reporting_year", "reporting_pop")]
group_lkup <- group_lkup[, c("region_code", "year", "pop")]

# Compute stats weighted average by groups
rgn <- df[, lapply(.SD, stats::weighted.mean, w = reporting_pop, na.rm = TRUE),
by = .(region_code, reporting_year, poverty_line),
rgn <- df[, lapply(.SD, stats::weighted.mean, w = pop, na.rm = TRUE),
by = .(region_code, year, poverty_line),
.SDcols = cols
]

rgn <- group_lkup[rgn,
on = .(region_code, reporting_year),
on = .(region_code, year),
allow.cartesian = TRUE
]

Expand All @@ -192,7 +192,7 @@ pip_aggregate_by <- function(df, group_lkup) {
out <- rbind(rgn, wld, fill = TRUE)

# Compute population living in poverty
out <- out[, pop_in_poverty := round(headcount * reporting_pop, 0)]
out <- out[, pop_in_poverty := round(headcount * pop, 0)]

return(out)
}
Expand All @@ -202,17 +202,17 @@ compute_world_aggregates <- function(rgn, cols) {
# Compute stats
wld <- rgn[, lapply(.SD,
stats::weighted.mean,
w = reporting_pop,
w = pop,
na.rm = TRUE),
by = .(reporting_year, poverty_line),
by = .(year, poverty_line),
.SDcols = cols
]
# Compute yearly population WLD totals
tmp <- rgn[, .(reporting_pop = sum(reporting_pop)),
by = .(reporting_year)]
tmp <- rgn[, .(pop = sum(pop)),
by = .(year)]


wld <- wld[tmp, on = .(reporting_year = reporting_year)]
wld <- wld[tmp, on = .(year = year)]
wld[["region_code"]] <- "WLD"

return(wld)
Expand All @@ -230,7 +230,7 @@ filter_for_aggregate_by <- function(df) {
# Otherwise, use whatever is available

out <- df[, check := length(reporting_level),
by = c("country_code", "reporting_year", "poverty_line")]
by = c("country_code", "year", "poverty_line")]
out <- out[out$check == 1 | (out$check > 1 & reporting_level == "national"), ]

return(out)
Expand Down
Loading