Skip to content

Commit

Permalink
update US population to use all non-null locations
Browse files Browse the repository at this point in the history
  • Loading branch information
AFg6K7h4fhy2 committed Feb 5, 2025
1 parent 6cb4cf9 commit d827559
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 8 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ print(loc_table)
│ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ str ┆ str ┆ i64 │
╞═══════════════╪════════════╪═════════════════════════════╪════════════╡
│ US ┆ US ┆ United States ┆ 330759736
│ US ┆ US ┆ United States ┆ 334735155
│ 01 ┆ AL ┆ Alabama ┆ 5024279 │
│ 02 ┆ AK ┆ Alaska ┆ 733391 │
│ 04 ┆ AZ ┆ Arizona ┆ 7151502 │
Expand Down
10 changes: 3 additions & 7 deletions forecasttools/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
an example FluSight submission.
"""

# %%
import os
import pathlib
from urllib import error, request
Expand Down Expand Up @@ -114,13 +115,8 @@ def merge_pop_data_and_loc_data(
)
loc_df = pl.read_parquet(locations_path) # should have "long_name"
merged_df = loc_df.join(pop_df, on="long_name", how="left")
# US total is not included by default; get US total
# us_states = make_united_states_dataset(
# file_save_path="united_states.parquet"
# )
# us_population = merged_df.filter(pl.col("long_name").is_in(us_states))[
# "population"
# ].sum()
# US total is not included by default; get US total from
# non-null territories & states
us_population = merged_df["population"].sum()
merged_df = merged_df.with_columns(
pl.when(pl.col("long_name") == "United States")
Expand Down
Binary file modified forecasttools/location_table.parquet
Binary file not shown.

0 comments on commit d827559

Please sign in to comment.