diff --git a/README.md b/README.md index 40a0335..a9328c6 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ print(loc_table) │ --- ┆ --- ┆ --- ┆ --- │ │ str ┆ str ┆ str ┆ i64 │ ╞═══════════════╪════════════╪═════════════════════════════╪════════════╡ - │ US ┆ US ┆ United States ┆ 330759736 │ + │ US ┆ US ┆ United States ┆ 334735155 │ │ 01 ┆ AL ┆ Alabama ┆ 5024279 │ │ 02 ┆ AK ┆ Alaska ┆ 733391 │ │ 04 ┆ AZ ┆ Arizona ┆ 7151502 │ diff --git a/forecasttools/data.py b/forecasttools/data.py index 200e5c9..e4f802f 100644 --- a/forecasttools/data.py +++ b/forecasttools/data.py @@ -7,6 +7,7 @@ an example FluSight submission. """ +# %% import os import pathlib from urllib import error, request @@ -114,13 +115,8 @@ def merge_pop_data_and_loc_data( ) loc_df = pl.read_parquet(locations_path) # should have "long_name" merged_df = loc_df.join(pop_df, on="long_name", how="left") - # US total is not included by default; get US total - # us_states = make_united_states_dataset( - # file_save_path="united_states.parquet" - # ) - # us_population = merged_df.filter(pl.col("long_name").is_in(us_states))[ - # "population" - # ].sum() + # US total is not included by default; get US total from + # non-null territories & states us_population = merged_df["population"].sum() merged_df = merged_df.with_columns( pl.when(pl.col("long_name") == "United States") diff --git a/forecasttools/location_table.parquet b/forecasttools/location_table.parquet index b0b2364..0c42043 100644 Binary files a/forecasttools/location_table.parquet and b/forecasttools/location_table.parquet differ