From f9e9cb9fae7f25bb13eefe4f184b9fad66435096 Mon Sep 17 00:00:00 2001 From: Trenton Bush Date: Wed, 2 Oct 2024 10:34:20 -0700 Subject: [PATCH 01/25] add comments to extract and some transform code --- src/dbcp/extract/eip_infrastructure.py | 8 ++++- src/dbcp/extract/epa_avert.py | 2 +- src/dbcp/extract/fips_tables.py | 9 ++--- src/dbcp/extract/lbnl_iso_queue.py | 4 +-- src/dbcp/extract/local_opposition.py | 7 +++- src/dbcp/extract/protected_area_by_county.py | 9 ++++- src/dbcp/extract/rmi_energy_communities.py | 5 ++- src/dbcp/transform/eip_infrastructure.py | 37 +++++++++++--------- src/dbcp/transform/fips_tables.py | 2 +- src/dbcp/transform/justice40.py | 2 +- 10 files changed, 54 insertions(+), 31 deletions(-) diff --git a/src/dbcp/extract/eip_infrastructure.py b/src/dbcp/extract/eip_infrastructure.py index 7efb3e7d..d0b6269b 100644 --- a/src/dbcp/extract/eip_infrastructure.py +++ b/src/dbcp/extract/eip_infrastructure.py @@ -1,4 +1,10 @@ -"""Retrieve data from EIP Infrastructure spreadsheets for analysis.""" +"""Retrieve data from EIP Infrastructure spreadsheets for analysis. + +This data was updated by contacting EIP directly for the latest version, but now they +host an Excel file at oilandgaswatch.org -> Resources -> Downloads, which points to: +https://drive.google.com/drive/folders/1udtw3XeezA5Lkb8Mfc_cntNTcV4oPuKi +Note that this new data version has changed structure from the one extracted below. +""" from pathlib import Path from typing import Dict diff --git a/src/dbcp/extract/epa_avert.py b/src/dbcp/extract/epa_avert.py index cc3e176d..166aff8e 100644 --- a/src/dbcp/extract/epa_avert.py +++ b/src/dbcp/extract/epa_avert.py @@ -1,4 +1,4 @@ -"""Retrieve data from EIP Infrastructure spreadsheets for analysis.""" +"""Retrieve data from EPA AVERT avoided carbon modeling.""" from pathlib import Path import pandas as pd diff --git a/src/dbcp/extract/fips_tables.py b/src/dbcp/extract/fips_tables.py index ed89f12a..f435ef77 100644 --- a/src/dbcp/extract/fips_tables.py +++ b/src/dbcp/extract/fips_tables.py @@ -1,4 +1,4 @@ -"""Extract canonical state and county FIPS tables from the addfips library.""" +"""Extract canonical state and county FIPS tables.""" from functools import lru_cache from importlib.resources import files from typing import Dict @@ -41,10 +41,7 @@ def extract_census_tribal_land(archive_uri: str) -> pd.DataFrame: def _extract_state_fips() -> pd.DataFrame: - """Extract canonical state and county FIPS tables from census data and the addfips library. - - Args: - vintage (int, optional): which Census year to use. Defaults to FIPS_CODE_VINTAGE. + """Extract canonical state FIPS tables from the addfips library. Returns: Dict[str, pd.DataFrame]: output dictionary of dataframes @@ -56,7 +53,7 @@ def _extract_state_fips() -> pd.DataFrame: def extract_fips(census_uri: str) -> Dict[str, pd.DataFrame]: - """Extract canonical state and county FIPS tables from census data and the addfips library. + """Extract canonical FIPS tables from census data and the addfips library. Returns: Dict[str, pd.DataFrame]: output dictionary of dataframes diff --git a/src/dbcp/extract/lbnl_iso_queue.py b/src/dbcp/extract/lbnl_iso_queue.py index d894ae69..9d5b2eda 100644 --- a/src/dbcp/extract/lbnl_iso_queue.py +++ b/src/dbcp/extract/lbnl_iso_queue.py @@ -1,4 +1,4 @@ -"""Retrieve data from the 20201 LBNL ISO Queue spreadsheet for analysis.""" +"""Retrieve data from the LBNL ISO Queue spreadsheet.""" from typing import Dict import pandas as pd @@ -10,7 +10,7 @@ def extract(uri: str) -> Dict[str, pd.DataFrame]: """Read Excel file with LBNL ISO Queue dataset. Args: - uri: uri of data in GCS relatives to the root. + uri: uri of data in GCS relative to the root. Returns: dfs: dictionary of dataframe name to raw dataframe. diff --git a/src/dbcp/extract/local_opposition.py b/src/dbcp/extract/local_opposition.py index 90c67aff..446544cf 100644 --- a/src/dbcp/extract/local_opposition.py +++ b/src/dbcp/extract/local_opposition.py @@ -1,4 +1,9 @@ -"""Extraction logic for Columbia Local Opposition dataset.""" +"""Extraction logic for Columbia Local Opposition dataset. + +This dataset is a .docx file with a hierarchical structure. The hierarchy is denoted by +formatting details (paragraph level, font, etc), but is surprisingly consistent. It is +infrequently updated by a research group at Columbia University. +""" from pathlib import Path from typing import Dict, List, Optional diff --git a/src/dbcp/extract/protected_area_by_county.py b/src/dbcp/extract/protected_area_by_county.py index 9cfdb8ef..358ba0cf 100644 --- a/src/dbcp/extract/protected_area_by_county.py +++ b/src/dbcp/extract/protected_area_by_county.py @@ -1,4 +1,11 @@ -"""Extract data from USGS PAD-US intersected with TIGER county shapefiles.""" +"""Extract data from USGS PAD-US intersected with TIGER county shapefiles. + +This data is derived from the Protected Areas Database of the United States (PAD-US) and +intersected with TIGER county shapefiles. It was prototyped in a notebook but was never +moved into a standalone module. The data loaded here is created in +notebooks/23-tpb-check_federal_lands.ipynb. Ideally this data would be re-created in +a module and loaded here, with a disk cache if necessary for performance. +""" from pathlib import Path import pandas as pd diff --git a/src/dbcp/extract/rmi_energy_communities.py b/src/dbcp/extract/rmi_energy_communities.py index ad15b2d9..d4d7747b 100644 --- a/src/dbcp/extract/rmi_energy_communities.py +++ b/src/dbcp/extract/rmi_energy_communities.py @@ -1,4 +1,7 @@ -"""Extract data from RMI's energy communities analysis.""" +"""Extract data from RMI/Catalyst energy communities analysis. + +Source repo: https://github.com/catalyst-cooperative/rmi-energy-communities +""" from pathlib import Path import pandas as pd diff --git a/src/dbcp/transform/eip_infrastructure.py b/src/dbcp/transform/eip_infrastructure.py index 10897204..18be4e07 100644 --- a/src/dbcp/transform/eip_infrastructure.py +++ b/src/dbcp/transform/eip_infrastructure.py @@ -38,11 +38,11 @@ def _format_column_names(cols: Sequence[str]) -> List[str]: def _fix_erroneous_array_items(ser: pd.Series, split_on=",", regex=False) -> pd.Series: - """Split on commas, preserve only the first value, and cast to numeric. + """Split on a delimiter and preserve only the first value. Several columns in EIP data should be numeric types but a small number of erroneous - values forces them to object dtype. The erroneous pattern is for the number to simply - be duplicated as a CSV string. For example, 0.2 appears as '0.2, 0.2'. + values forces them to object dtype. The erroneous pattern is for the value to be + duplicated as a CSV string. For example, 0.2 appears as '0.2, 0.2'. Args: ser (pd.Series): values to fix @@ -120,9 +120,8 @@ def facilities_transform(raw_fac_df: pd.DataFrame) -> pd.DataFrame: "raw_wastewater_discharge_indicator", ] for col in should_be_numeric: - if not pd.api.types.is_numeric_dtype(fac[col]): - new = _fix_erroneous_array_items(fac[col]) - fac[col] = pd.to_numeric(new, errors="raise") + new = _fix_erroneous_array_items(fac[col]) + fac[col] = pd.to_numeric(new, errors="raise") fac.loc[:, "is_ccs"] = _convert_string_to_boolean(fac.loc[:, "raw_is_ccs"]) @@ -161,8 +160,8 @@ def facilities_transform(raw_fac_df: pd.DataFrame) -> pd.DataFrame: ) duplicative_columns = [ # these are raw names - # These columns are just a concatenation of the names and IDs corresponding to the ID columns - # They add no information and invite inconsistency + # These columns are just a concatenation of the names and IDs corresponding to + # the ID columns. They add no information and invite inconsistency "Company", "Project", "Associated Facilities", @@ -236,10 +235,9 @@ def projects_transform(raw_proj_df: pd.DataFrame) -> pd.DataFrame: ] for col in should_be_numeric: # these columns suffer from occasional duplicate values as CSV for some reason. - # Like "1.0, 1.0". The second number is never different. - if not pd.api.types.is_numeric_dtype(proj[col]): - new = _fix_erroneous_array_items(proj[col]) - proj[col] = pd.to_numeric(new, errors="raise") + # Like "1.0, 1.0". The second number is never different. [validate this?] + new = _fix_erroneous_array_items(proj[col]) + proj[col] = pd.to_numeric(new, errors="raise") proj.loc[:, "is_ccs"] = _convert_string_to_boolean(proj.loc[:, "raw_is_ccs"]) proj.loc[:, "is_ally_target"] = _convert_string_to_boolean( @@ -248,10 +246,17 @@ def projects_transform(raw_proj_df: pd.DataFrame) -> pd.DataFrame: # manual correction for project with 92 Billion dollar cost (lol). Googled it and # it was supposed to be 9.2 Billion - proj.loc[ - proj["name"].eq("Gron Fuels' Renewable Fuels Plant - Initial Construction"), - "cost_millions", - ] *= 0.1 + if ( + proj.loc[ + proj["name"].eq("Gron Fuels' Renewable Fuels Plant - Initial Construction"), + "cost_millions", + ] + >= 9000 # it's over 9000! + ): + proj.loc[ + proj["name"].eq("Gron Fuels' Renewable Fuels Plant - Initial Construction"), + "cost_millions", + ] *= 0.1 # manual fix. One project's facility id doesn't exist. The project is the Oil part # of the willow Project. The next project ID belongs to the gas part, and its # facility ID does exist. So I assign the oil facility ID to the gas facility ID. diff --git a/src/dbcp/transform/fips_tables.py b/src/dbcp/transform/fips_tables.py index 7dbf3918..de35319d 100644 --- a/src/dbcp/transform/fips_tables.py +++ b/src/dbcp/transform/fips_tables.py @@ -36,7 +36,7 @@ def _add_tribal_land_frac( dissolved_tribal = tribal_land.dissolve() dissolved_tribal_geometry = dissolved_tribal.geometry.iloc[0] - # Calculate intersection, convert to km + # Calculate intersection, convert m^2 to km^2 counties["tribal_land_intersection"] = ( counties.intersection(dissolved_tribal_geometry).area / 1e6 ) diff --git a/src/dbcp/transform/justice40.py b/src/dbcp/transform/justice40.py index 7a236576..2cf313b7 100644 --- a/src/dbcp/transform/justice40.py +++ b/src/dbcp/transform/justice40.py @@ -156,7 +156,7 @@ def transform(raw_j40: dict[str, pd.DataFrame]) -> dict[str, pd.DataFrame]: out_df.drop(columns="", inplace=True) out_df.loc[:, "tract_id_fips"] = _fips_int_to_string(out_df.loc[:, "tract_id_fips"]) - # Correct percents + # Correct percentage errors and convert to fractions percent_cols = list(filter(lambda col: col.endswith("_percent"), list(out_df))) for col in percent_cols: col_max = out_df[col].max() From 3307e356bfbef3a6f8c5dfef6cc41876b1c61052 Mon Sep 17 00:00:00 2001 From: Trenton Bush Date: Mon, 7 Oct 2024 15:53:40 -0700 Subject: [PATCH 02/25] bugfix pd.Series comparison --- src/dbcp/transform/eip_infrastructure.py | 30 +++++++++++++++--------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/dbcp/transform/eip_infrastructure.py b/src/dbcp/transform/eip_infrastructure.py index 18be4e07..b525406c 100644 --- a/src/dbcp/transform/eip_infrastructure.py +++ b/src/dbcp/transform/eip_infrastructure.py @@ -246,17 +246,13 @@ def projects_transform(raw_proj_df: pd.DataFrame) -> pd.DataFrame: # manual correction for project with 92 Billion dollar cost (lol). Googled it and # it was supposed to be 9.2 Billion - if ( - proj.loc[ - proj["name"].eq("Gron Fuels' Renewable Fuels Plant - Initial Construction"), - "cost_millions", - ] - >= 9000 # it's over 9000! - ): - proj.loc[ - proj["name"].eq("Gron Fuels' Renewable Fuels Plant - Initial Construction"), - "cost_millions", - ] *= 0.1 + to_correct = proj.loc[ + proj["name"].eq("Gron Fuels' Renewable Fuels Plant - Initial Construction"), + "cost_millions", + ] + assert len(to_correct) == 1, "Expected one project to correct." + assert to_correct.ge(9000).all(), "Expected erroneous cost over 9 billion." + to_correct *= 0.1 # manual fix. One project's facility id doesn't exist. The project is the Oil part # of the willow Project. The next project ID belongs to the gas part, and its # facility ID does exist. So I assign the oil facility ID to the gas facility ID. @@ -447,3 +443,15 @@ def transform(raw_eip_dfs: Dict[str, pd.DataFrame]) -> Dict[str, pd.DataFrame]: } return out + + +if __name__ == "__main__": + # debugging entry point + from pathlib import Path + + from dbcp.extract.eip_infrastructure import extract + + source_path = Path("/app/data/raw/2023.05.24 OGW database.xlsx") + eip_raw_dfs = extract(source_path) + eip_transformed_dfs = transform(eip_raw_dfs) + print("yay") From 5d2b3cf9701322d191a75aa9f9e4b18826b9ab60 Mon Sep 17 00:00:00 2001 From: Dazhong Xia Date: Fri, 11 Oct 2024 14:48:29 -0400 Subject: [PATCH 03/25] Remove a deleted column from docs --- docs/data-mart/counties_long_format.md | 1 - docs/data-mart/counties_wide_format.md | 1 - 2 files changed, 2 deletions(-) diff --git a/docs/data-mart/counties_long_format.md b/docs/data-mart/counties_long_format.md index f2a2dba4..e516f5cc 100644 --- a/docs/data-mart/counties_long_format.md +++ b/docs/data-mart/counties_long_format.md @@ -61,7 +61,6 @@ This table is mostly a restructured version of counties_wide_format. It provides ||`n_tracts_local_to_area_income_ratio_less_than_high_school_islan`|Number of tracts with low ratios of local to regional income and low high school graduation rates (island territories only)|Justice40|| ||`n_tracts_pm2_5_low_income`|Number of tracts with high particulate matter pollution and low income (excepting students)|Justice40|| ||`n_tracts_population_loss_low_income`|Number of tracts with high predicted climate-driven population loss and low income (excepting students)|Justice40|| -||`n_tracts_risk_management_plan_proximity_low_income`|Number of tracts with close proximity to RMP sites and low income (excepting students)|Justice40|| ||`n_tracts_superfund_proximity_low_income`|Number of tracts with close proximity to superfund sites and low income (excepting students)|Justice40|| ||`n_tracts_traffic_low_income`|Number of tracts with high traffic exposure and low income (excepting students)|Justice40|| ||`n_tracts_unemployment_and_low_high_school`|Number of tracts with high unemployment and low high school graduation rates|Justice40|| diff --git a/docs/data-mart/counties_wide_format.md b/docs/data-mart/counties_wide_format.md index 19203974..6b5841b8 100644 --- a/docs/data-mart/counties_wide_format.md +++ b/docs/data-mart/counties_wide_format.md @@ -163,7 +163,6 @@ Fossil generation aggregates include coal, oil, and gas power plants. ||`n_tracts_local_to_area_income_ratio_less_than_high_school_islan`|Number of tracts with low ratios of local to regional income and low high school graduation rates (island territories only)|Justice40|| ||`n_tracts_pm2_5_low_income`|Number of tracts with high particulate matter pollution and low income (excepting students)|Justice40|| ||`n_tracts_population_loss_low_income`|Number of tracts with high predicted climate-driven population loss and low income (excepting students)|Justice40|| -||`n_tracts_risk_management_plan_proximity_low_income`|Number of tracts with close proximity to RMP sites and low income (excepting students)|Justice40|| ||`n_tracts_superfund_proximity_low_income`|Number of tracts with close proximity to superfund sites and low income (excepting students)|Justice40|| ||`n_tracts_traffic_low_income`|Number of tracts with high traffic exposure and low income (excepting students)|Justice40|| ||`n_tracts_unemployment_and_low_high_school`|Number of tracts with high unemployment and low high school graduation rates|Justice40|| From 3f621d826044266f5f131c17a237122dc8f2df08 Mon Sep 17 00:00:00 2001 From: bendnorman Date: Sun, 10 Nov 2024 21:54:01 -0800 Subject: [PATCH 04/25] First pass at updating GS Q3 2024 --- src/dbcp/__init__.py | 1 + src/dbcp/data_mart/projects.py | 6 +++--- src/dbcp/extract/gridstatus_isoqueues.py | 15 ++++++++------- src/dbcp/transform/gridstatus.py | 15 +++++++++++---- src/dbcp/validation/tests.py | 11 +++++++---- 5 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/dbcp/__init__.py b/src/dbcp/__init__.py index 612d9bce..65aefddf 100644 --- a/src/dbcp/__init__.py +++ b/src/dbcp/__init__.py @@ -3,6 +3,7 @@ import dbcp.data_mart # noqa: F401 import dbcp.data_mart.br_election_data # noqa: F401 import dbcp.data_mart.counties # noqa: F401 +import dbcp.data_mart.county_concrete_mw # noqa: F401 import dbcp.data_mart.projects # noqa: F401 import dbcp.etl # noqa: F401 import dbcp.extract # noqa: F401 diff --git a/src/dbcp/data_mart/projects.py b/src/dbcp/data_mart/projects.py index 4b55d028..19a14a51 100644 --- a/src/dbcp/data_mart/projects.py +++ b/src/dbcp/data_mart/projects.py @@ -781,7 +781,7 @@ def create_project_change_log(long_format: pd.DataFrame) -> pd.DataFrame: f"{pct_after_current_year:.2%} of operational projects have actual_completion_date after the current year." ) # make sure pct_after_current_year is less than 0.001 of operational projects - expected_missing = 0.001 + expected_missing = 0.002 assert ( pct_after_current_year < expected_missing ), f"More than {expected_missing}% of operational projects have actual_completion_date after the current year." @@ -909,7 +909,7 @@ def validate_project_change_log( "MISO": 0.09, # A lot of operational projects prior to 2010 are missing operational dates "NYISO": 0.18, # A lot of withdrawn projects from the early 2000s are missing withdrawn and operational dates "PJM": 0.04, - "SPP": 0.31, # A lot of withdrawn projects from the early 2000s are missing withdrawn and operational dates + "SPP": 0.32, # A lot of withdrawn projects from the early 2000s are missing withdrawn and operational dates } ) @@ -964,7 +964,7 @@ def validate_iso_regions_change_log( "MISO": 0.09, # A lot of operational projects prior to 2010 are missing operational dates "NYISO": 0.20, # A lot of withdrawn projects from the early 2000s are missing withdrawn and operational dates "PJM": 0.04, - "SPP": 0.31, # A lot of withdrawn projects from the early 2000s are missing withdrawn and operational dates + "SPP": 0.32, # A lot of withdrawn projects from the early 2000s are missing withdrawn and operational dates } ) diff --git a/src/dbcp/extract/gridstatus_isoqueues.py b/src/dbcp/extract/gridstatus_isoqueues.py index 6b63106f..655b917f 100644 --- a/src/dbcp/extract/gridstatus_isoqueues.py +++ b/src/dbcp/extract/gridstatus_isoqueues.py @@ -8,6 +8,7 @@ data. The version numbers are automatically generated by Google Cloud Storage Object Versioning. """ + import logging import pandas as pd @@ -17,14 +18,14 @@ logger = logging.getLogger(__name__) ISO_QUEUE_VERSIONS: dict[str, str] = { - "miso": "1719774997006069", + "miso": "1728242350923420", "miso-pre-2017": "1709776311574737", - "caiso": "1719774997530790", - "pjm": "1719774998059470", - "ercot": "1719774998544416", - "spp": "1719774998998901", - "nyiso": "1719774999497797", - "isone": "1719774999940225", + "caiso": "1728242351254356", + "pjm": "1728242351606642", + "ercot": "1728242351929200", + "spp": "1728242352244156", + "nyiso": "1728242352584485", + "isone": "1728242352913470", } diff --git a/src/dbcp/transform/gridstatus.py b/src/dbcp/transform/gridstatus.py index bfbdc9a7..5d2c1c95 100644 --- a/src/dbcp/transform/gridstatus.py +++ b/src/dbcp/transform/gridstatus.py @@ -478,7 +478,7 @@ def _clean_resource_type( resource_locations["county_id_fips"].isin(coastal_county_id_fips.keys()) & resource_locations.resource_clean.eq("Onshore Wind") ].project_id - expected_n_coastal_wind_projects = 92 + expected_n_coastal_wind_projects = 85 assert ( len(nyiso_coastal_wind_project_project_ids) == expected_n_coastal_wind_projects ), f"Expected {expected_n_coastal_wind_projects} NYISO coastal wind projects but found {len(nyiso_coastal_wind_project_project_ids)}" @@ -622,9 +622,13 @@ def _transform_miso(post_2017: pd.DataFrame, pre_2017: pd.DataFrame) -> pd.DataF in_service_projects = iso_df[ iso_df["Post Generator Interconnection Agreement Status"].eq("In Service") ] + done_in_service_projects = in_service_projects[ + in_service_projects["queue_status"].ne("Done") + ] + expected_n_done_in_service_projects = 2 assert ( - len(in_service_projects[in_service_projects["queue_status"].ne("Done")]) <= 1 - ), "There is an unexpected number of MISO projects that are In Service but not Done." + len(done_in_service_projects) <= expected_n_done_in_service_projects + ), f"Expected {expected_n_done_in_service_projects} MISO projects that are In Service but not Done but found {len(done_in_service_projects)}." # Mark "Done" projects as "Active" because they are not necesarily operational yet. iso_df["queue_status"] = iso_df["queue_status"].map( @@ -709,6 +713,9 @@ def _transform_pjm(iso_df: pd.DataFrame) -> pd.DataFrame: } iso_df["queue_status"] = iso_df["queue_status"].map(status_map) + # There is one project that is missing a queue status. Assume it is withdrawn + iso_df.loc[iso_df.queue_id.eq("AC1-073"), "queue_status"] = "Withdrawn" + iso_df = _create_project_status_classification_from_multiple_columns( iso_df, facilities_study_status_col="Facilities Study Status", @@ -925,7 +932,7 @@ def _normalize_project_locations(iso_df: pd.DataFrame) -> pd.DataFrame: geocoded_locations[["county_id_fips", "project_id"]].duplicated(keep=False) ] assert ( - len(duplicate_locations) <= 108 + len(duplicate_locations) <= 114 ), f"Found more duplicate locations in Grid Status location table than expected:\n {duplicate_locations}" return geocoded_locations diff --git a/src/dbcp/validation/tests.py b/src/dbcp/validation/tests.py index 04c62acd..7b97838a 100644 --- a/src/dbcp/validation/tests.py +++ b/src/dbcp/validation/tests.py @@ -92,7 +92,9 @@ def test_iso_projects_sources(engine: Engine): expected_source = {"proprietary"} offshore_test = pd.read_sql( proprietary_offshore, engine, index_col="source" - ).squeeze(axis=1) # make series + ).squeeze( + axis=1 + ) # make series actual_source = set(offshore_test.index) assert ( actual_source == expected_source @@ -241,9 +243,10 @@ def test_county_wide_coverage(engine: Engine): df.shape[0] == n_counties ), "counties_wide_format does not contain all counties" notnull = df.notnull() - assert notnull.any(axis=1).sum() == 2461, ( - "counties_wide_format has unexpected county coverage:" - f" {notnull.loc[notnull.any(axis=1), 'county_id_fips']}" + n_expected_counties = 2458 + assert notnull.any(axis=1).sum() == n_expected_counties, ( + "counties_wide_format has unexpected county coverage." + f" Expected {n_expected_counties}, found {notnull.any(axis=1).sum()}" ) From 351658c63f58c84750ede681c8f4dd4e50085245 Mon Sep 17 00:00:00 2001 From: bendnorman Date: Sun, 10 Nov 2024 22:38:36 -0800 Subject: [PATCH 05/25] Convert br election data query to pandas --- src/dbcp/data_mart/br_election_data.py | 23 +++++++++++------------ src/dbcp/data_mart/counties.py | 1 + 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/dbcp/data_mart/br_election_data.py b/src/dbcp/data_mart/br_election_data.py index b95d7261..23d15563 100644 --- a/src/dbcp/data_mart/br_election_data.py +++ b/src/dbcp/data_mart/br_election_data.py @@ -1,4 +1,5 @@ """Module to create a denormalized table for the Ballot Ready Election Data.""" + from typing import Optional import pandas as pd @@ -9,22 +10,20 @@ def _create_br_election_data_mart(engine: sa.engine.Engine) -> pd.DataFrame: """Denormalize the ballot ready entities.""" - pos_county_query = """ - SELECT - cfips.county_name, - sfips.state_name, - br.* - FROM data_warehouse.br_positions_counties_assoc as br - LEFT JOIN data_warehouse.county_fips as cfips - USING (county_id_fips) - LEFT JOIN data_warehouse.state_fips as sfips - ON sfips.state_id_fips = br.state_id_fips - """ with engine.connect() as con: br_races = pd.read_sql_table("br_races", con, schema="data_warehouse") br_elections = pd.read_sql_table("br_elections", con, schema="data_warehouse") br_positions = pd.read_sql_table("br_positions", con, schema="data_warehouse") - br_positions_counties_assoc = pd.read_sql(pos_county_query, con) + br_positions_counties_assoc = pd.read_sql_table( + "br_positions_counties_assoc", con, schema="data_warehouse" + ) + county_fips = pd.read_sql_table("county_fips", con, schema="data_warehouse") + state_fips = pd.read_sql_table("state_fips", con, schema="data_warehouse") + + # Add state and county names + br_positions_counties_assoc = br_positions_counties_assoc.merge( + county_fips[["county_id_fips", "county_name"]], how="left", on="county_id_fips" + ).merge(state_fips[["state_id_fips", "state_name"]], how="left", on="state_id_fips") br_election_data = br_races.merge( br_elections, how="left", on="election_id", validate="m:1" diff --git a/src/dbcp/data_mart/counties.py b/src/dbcp/data_mart/counties.py index 95aa2fe5..6713794a 100644 --- a/src/dbcp/data_mart/counties.py +++ b/src/dbcp/data_mart/counties.py @@ -19,6 +19,7 @@ `count(distinct my_column)) group by county, resource` cannot have another `count(distinct my_column) group by county` on top of it. """ + from io import StringIO from typing import Dict, Optional From 5d6ceb46b3bd9b66809e7e464e9e10040eb3af9c Mon Sep 17 00:00:00 2001 From: bendnorman Date: Sun, 10 Nov 2024 22:50:32 -0800 Subject: [PATCH 06/25] Convert a couple other sql queries to pandas --- src/dbcp/data_mart/counties.py | 19 ++++++++-------- src/dbcp/data_mart/county_concrete_mw.py | 29 +++++++++++++----------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/src/dbcp/data_mart/counties.py b/src/dbcp/data_mart/counties.py index 6713794a..656cef0b 100644 --- a/src/dbcp/data_mart/counties.py +++ b/src/dbcp/data_mart/counties.py @@ -967,16 +967,15 @@ def _add_avoided_co2e(iso: pd.DataFrame, engine: sa.engine.Engine) -> pd.DataFra def _get_avoided_emissions_by_county_resource(engine: sa.engine.Engine) -> pd.DataFrame: - query = """ - select - avert_region, - resource_type, - co2e_tonnes_per_year_per_mw - from data_warehouse.avert_avoided_emissions_factors - -- drop distributed_pv - WHERE resource_type in ('onshore_wind', 'offshore_wind', 'utility_pv') - """ - emiss_fac = pd.read_sql(query, engine) + emiss_fac = pd.read_sql_table( + "avert_avoided_emissions_factors", engine, schema="data_warehouse" + ) + emiss_fac = emiss_fac[ + emiss_fac.resource_type.isin(["onshore_wind", "offshore_wind", "utility_pv"]) + ] + emiss_fac = emiss_fac[ + ["avert_region", "resource_type", "co2e_tonnes_per_year_per_mw"] + ] crosswalk = pd.read_sql_table( "avert_county_region_assoc", engine, schema="data_warehouse" ) diff --git a/src/dbcp/data_mart/county_concrete_mw.py b/src/dbcp/data_mart/county_concrete_mw.py index 8650fe58..e29b65c0 100644 --- a/src/dbcp/data_mart/county_concrete_mw.py +++ b/src/dbcp/data_mart/county_concrete_mw.py @@ -1,4 +1,5 @@ """Create county-level aggregates of proposed projects from EIA860m and ACP data.""" + from typing import Optional import pandas as pd @@ -57,19 +58,21 @@ def _get_concrete_aggs(engine: sa.engine.Engine) -> pd.DataFrame: inplace=True, ) - acp_query = """ - SELECT - plant_id_eia, - NULL as generator_id, - capacity_mw, - lower(resource) AS resource_clean, - status, - county_id_fips, - iso_region - from private_data_warehouse.acp_projects as acp - where status in ('Advanced Development', 'Under Construction') - """ - acp = pd.read_sql(acp_query, engine) + acp = pd.read_sql_table("acp_projects", engine, schema="private_data_warehouse") + acp = acp[ + [ + "plant_id_eia", + "capacity_mw", + "resource", + "status", + "county_id_fips", + "iso_region", + ] + ] + acp["resource_clean"] = acp["resource"].str.lower() + acp["generator_id"] = pd.NA + acp = acp[acp.status.isin(["Advanced Development", "Under Construction"])] + # remove overlapping projects from ACP (prioritize 860m) is_overlap = acp["plant_id_eia"].isin(eia860m["plant_id_eia"]) acp = acp.loc[~is_overlap, :] From 7eee047da0bc87737bc8c25b4e972345b96ca0ec Mon Sep 17 00:00:00 2001 From: bendnorman Date: Sun, 10 Nov 2024 23:00:40 -0800 Subject: [PATCH 07/25] Convert justice mart from sql to pandas --- src/dbcp/data_mart/counties.py | 106 +++++++++++++++++++++++---------- 1 file changed, 74 insertions(+), 32 deletions(-) diff --git a/src/dbcp/data_mart/counties.py b/src/dbcp/data_mart/counties.py index 656cef0b..e921707b 100644 --- a/src/dbcp/data_mart/counties.py +++ b/src/dbcp/data_mart/counties.py @@ -135,38 +135,80 @@ def _create_dbcp_ej_index(j40_df: pd.DataFrame) -> pd.Series: def _get_env_justice_df(engine: sa.engine.Engine) -> pd.DataFrame: """Create county-level aggregates of Justice40 tracts.""" - query = """ - SELECT - SUBSTRING("tract_id_fips", 1, 5) as county_id_fips, - COUNT("tract_id_fips") as total_tracts, - SUM("is_disadvantaged"::INTEGER) as n_distinct_qualifying_tracts, - SUM("expected_agriculture_loss_rate_is_low_income"::INTEGER) as n_tracts_agriculture_loss_low_income, - SUM("expected_building_loss_rate_is_low_income"::INTEGER) as n_tracts_building_loss_low_income, - SUM("expected_population_loss_rate_is_low_income"::INTEGER) as n_tracts_population_loss_low_income, - SUM("diesel_particulates_is_low_income"::INTEGER) as n_tracts_diesel_particulates_low_income, - SUM("energy_burden_is_low_income"::INTEGER) as n_tracts_energy_burden_low_income, - SUM("pm2_5_is_low_income"::INTEGER) as n_tracts_pm2_5_low_income, - SUM("traffic_proximity_is_low_income"::INTEGER) as n_tracts_traffic_low_income, - SUM("lead_paint_and_median_house_value_is_low_income"::INTEGER) as n_tracts_lead_paint_and_median_home_price_low_income, - SUM("housing_burden_is_low_income"::INTEGER) as n_tracts_housing_burden_low_income, - SUM("proximity_to_superfund_sites_is_low_income"::INTEGER) as n_tracts_superfund_proximity_low_income, - SUM("wastewater_discharge_is_low_income"::INTEGER) as n_tracts_wastewater_low_income, - SUM("asthma_is_low_income"::INTEGER) as n_tracts_asthma_low_income, - SUM("heart_disease_is_low_income"::INTEGER) as n_tracts_heart_disease_low_income, - SUM("diabetes_is_low_income"::INTEGER) as n_tracts_diabetes_low_income, - SUM("low_median_household_income_and_low_hs_attainment"::INTEGER) as n_tracts_local_to_area_income_ratio_and_low_high_school, - SUM("households_in_linguistic_isolation_and_low_hs_attainment"::INTEGER) as n_tracts_linguistic_isolation_and_low_high_school, - SUM("households_below_federal_poverty_level_low_hs_attainment"::INTEGER) as n_tracts_below_poverty_and_low_high_school, - SUM("unemployment_and_low_hs_attainment"::INTEGER) as n_tracts_unemployment_and_low_high_school, - SUM("proximity_to_hazardous_waste_facilities_is_low_income"::INTEGER) as n_tracts_hazardous_waste_proximity_low_income, - SUM("unemployment_and_low_hs_edu_islands"::INTEGER) as n_tracts_unemployment_less_than_high_school_islands, - SUM("low_median_household_income_and_low_hs_edu_islands"::INTEGER) as n_tracts_local_to_area_income_ratio_less_than_high_school_islands, - SUM("households_below_federal_poverty_level_low_hs_edu_islands"::INTEGER) as n_tracts_below_poverty_line_less_than_high_school_islands, - SUM("low_life_expectancy_is_low_income"::INTEGER) as n_tracts_life_expectancy_low_income - FROM "data_warehouse"."justice40_tracts" - GROUP BY 1; - """ - df = pd.read_sql(query, engine) + df = pd.read_sql_table("justice40_tracts", engine, schema="data_warehouse") + df["county_id_fips"] = df["tract_id_fips"].str.slice(0, 5) + df.groupby("county_id_fips").agg( + total_tracts=("tract_id_fips", "count"), + n_distinct_qualifying_tracts=("is_disadvantaged", "sum"), + n_tracts_agriculture_loss_low_income=( + "expected_agriculture_loss_rate_is_low_income", + "sum", + ), + n_tracts_building_loss_low_income=( + "expected_building_loss_rate_is_low_income", + "sum", + ), + n_tracts_population_loss_low_income=( + "expected_population_loss_rate_is_low_income", + "sum", + ), + n_tracts_diesel_particulates_low_income=( + "diesel_particulates_is_low_income", + "sum", + ), + n_tracts_energy_burden_low_income=("energy_burden_is_low_income", "sum"), + n_tracts_pm2_5_low_income=("pm2_5_is_low_income", "sum"), + n_tracts_traffic_low_income=("traffic_proximity_is_low_income", "sum"), + n_tracts_lead_paint_and_median_home_price_low_income=( + "lead_paint_and_median_house_value_is_low_income", + "sum", + ), + n_tracts_housing_burden_low_income=("housing_burden_is_low_income", "sum"), + n_tracts_superfund_proximity_low_income=( + "proximity_to_superfund_sites_is_low_income", + "sum", + ), + n_tracts_wastewater_low_income=("wastewater_discharge_is_low_income", "sum"), + n_tracts_asthma_low_income=("asthma_is_low_income", "sum"), + n_tracts_heart_disease_low_income=("heart_disease_is_low_income", "sum"), + n_tracts_diabetes_low_income=("diabetes_is_low_income", "sum"), + n_tracts_local_to_area_income_ratio_and_low_high_school=( + "low_median_household_income_and_low_hs_attainment", + "sum", + ), + n_tracts_linguistic_isolation_and_low_high_school=( + "households_in_linguistic_isolation_and_low_hs_attainment", + "sum", + ), + n_tracts_below_poverty_and_low_high_school=( + "households_below_federal_poverty_level_low_hs_attainment", + "sum", + ), + n_tracts_unemployment_and_low_high_school=( + "unemployment_and_low_hs_attainment", + "sum", + ), + n_tracts_hazardous_waste_proximity_low_income=( + "proximity_to_hazardous_waste_facilities_is_low_income", + "sum", + ), + n_tracts_unemployment_less_than_high_school_islands=( + "unemployment_and_low_hs_edu_islands", + "sum", + ), + n_tracts_local_to_area_income_ratio_less_than_high_school_islands=( + "low_median_household_income_and_low_hs_edu_islands", + "sum", + ), + n_tracts_below_poverty_line_less_than_high_school_islands=( + "households_below_federal_poverty_level_low_hs_edu_islands", + "sum", + ), + n_tracts_life_expectancy_low_income=( + "low_life_expectancy_is_low_income", + "sum", + ), + ) df["justice40_dbcp_index"] = _create_dbcp_ej_index(df) return df From 2b4b5a7f67980c5f01e1193679761cd206d231d3 Mon Sep 17 00:00:00 2001 From: bendnorman Date: Sun, 10 Nov 2024 23:10:16 -0800 Subject: [PATCH 08/25] Convert a couple more from sql to pandas --- src/dbcp/data_mart/counties.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dbcp/data_mart/counties.py b/src/dbcp/data_mart/counties.py index e921707b..3890cad4 100644 --- a/src/dbcp/data_mart/counties.py +++ b/src/dbcp/data_mart/counties.py @@ -137,7 +137,7 @@ def _get_env_justice_df(engine: sa.engine.Engine) -> pd.DataFrame: """Create county-level aggregates of Justice40 tracts.""" df = pd.read_sql_table("justice40_tracts", engine, schema="data_warehouse") df["county_id_fips"] = df["tract_id_fips"].str.slice(0, 5) - df.groupby("county_id_fips").agg( + df = df.groupby("county_id_fips").agg( total_tracts=("tract_id_fips", "count"), n_distinct_qualifying_tracts=("is_disadvantaged", "sum"), n_tracts_agriculture_loss_low_income=( @@ -196,7 +196,7 @@ def _get_env_justice_df(engine: sa.engine.Engine) -> pd.DataFrame: "unemployment_and_low_hs_edu_islands", "sum", ), - n_tracts_local_to_area_income_ratio_less_than_high_school_islands=( + n_tracts_local_to_area_income_ratio_less_than_high_school_islan=( "low_median_household_income_and_low_hs_edu_islands", "sum", ), From be2ae95b8a32deb4fba0895df8bb381b25dce87a Mon Sep 17 00:00:00 2001 From: bendnorman Date: Sun, 10 Nov 2024 23:30:33 -0800 Subject: [PATCH 09/25] Remove PUDL dependency --- requirements.txt | 2 +- src/dbcp/constants.py | 100 +++++++++++++++++++- src/dbcp/helpers.py | 52 ++++++++++ src/dbcp/transform/ballot_ready.py | 3 +- src/dbcp/transform/helpers.py | 7 +- src/dbcp/transform/lbnl_iso_queue.py | 5 +- src/dbcp/transform/local_opposition.py | 7 +- src/dbcp/transform/ncsl_state_permitting.py | 5 +- src/dbcp/transform/pudl_data.py | 7 +- 9 files changed, 172 insertions(+), 16 deletions(-) diff --git a/requirements.txt b/requirements.txt index ce107052..6d2af04d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -catalystcoop.pudl==2022.11.30 +addfips>=0.4 fiona~=1.9.4 psycopg2~=2.9.3 pytest~=6.2.5 diff --git a/src/dbcp/constants.py b/src/dbcp/constants.py index e81978d8..3e3af4e6 100644 --- a/src/dbcp/constants.py +++ b/src/dbcp/constants.py @@ -1,7 +1,105 @@ """DBCP constants.""" + +from io import StringIO from pathlib import Path -from pudl.metadata.enums import POLITICAL_SUBDIVISIONS +import pandas as pd + +POLITICAL_SUBDIVISIONS: pd.DataFrame = pd.read_csv( + StringIO( + """ +subdivision_code,subdivision_name,country_code,country_name,subdivision_type,timezone_approx,state_id_fips,division_name_us_census,division_code_us_census,region_name_us_census,is_epacems_state +AB,Alberta,CAN,Canada,province,America/Edmonton,,,,,0 +AK,Alaska,USA,United States of America,state,America/Anchorage,"02",Pacific Noncontiguous,PCN,West,1 +AL,Alabama,USA,United States of America,state,America/Chicago,"01",East South Central,ESC,South,1 +AR,Arkansas,USA,United States of America,state,America/Chicago,"05",West South Central,WSC,South,1 +AS,American Samoa,USA,United States of America,outlying_area,Pacific/Pago_Pago,"60",,,,0 +AZ,Arizona,USA,United States of America,state,America/Phoenix,"04",Mountain,MTN,West,1 +BC,British Columbia,CAN,Canada,province,America/Vancouver,,,,,0 +CA,California,USA,United States of America,state,America/Los_Angeles,"06",Pacific Contiguous,PCC,West,1 +CO,Colorado,USA,United States of America,state,America/Denver,"08",Mountain,MTN,West,1 +CT,Connecticut,USA,United States of America,state,America/New_York,"09",New England,NEW,Northeast,1 +DC,District of Columbia,USA,United States of America,district,America/New_York,"11",South Atlantic,SAT,South,1 +DE,Delaware,USA,United States of America,state,America/New_York,"10",South Atlantic,SAT,South,1 +FL,Florida,USA,United States of America,state,America/New_York,"12",South Atlantic,SAT,South,1 +GA,Georgia,USA,United States of America,state,America/New_York,"13",South Atlantic,SAT,South,1 +GU,Guam,USA,United States of America,outlying_area,Pacific/Guam,"66",,,,0 +HI,Hawaii,USA,United States of America,state,Pacific/Honolulu,"15",Pacific Noncontiguous,PCN,West,1 +IA,Iowa,USA,United States of America,state,America/Chicago,"19",West North Central,WNC,Midwest,1 +ID,Idaho,USA,United States of America,state,America/Denver,"16",Mountain,MTN,West,1 +IL,Illinois,USA,United States of America,state,America/Chicago,"17",East North Central,ENC,Midwest,1 +IN,Indiana,USA,United States of America,state,America/New_York,"18",East North Central,ENC,Midwest,1 +KS,Kansas,USA,United States of America,state,America/Chicago,"20",West North Central,WNC,Midwest,1 +KY,Kentucky,USA,United States of America,state,America/New_York,"21",East South Central,ESC,South,1 +LA,Louisiana,USA,United States of America,state,America/Chicago,"22",West South Central,WSC,South,1 +MA,Massachusetts,USA,United States of America,state,America/New_York,"25",New England,NEW,Northeast,1 +MB,Manitoba,CAN,Canada,province,America/Winnipeg,,,,,0 +MD,Maryland,USA,United States of America,state,America/New_York,"24",South Atlantic,SAT,South,1 +ME,Maine,USA,United States of America,state,America/New_York,"23",New England,NEW,Northeast,1 +MI,Michigan,USA,United States of America,state,America/Detroit,"26",East North Central,ENC,Midwest,1 +MN,Minnesota,USA,United States of America,state,America/Chicago,"27",West North Central,WNC,Midwest,1 +MO,Missouri,USA,United States of America,state,America/Chicago,"29",West North Central,WNC,Midwest,1 +MP,Northern Mariana Islands,USA,United States of America,outlying_area,Pacific/Guam,"69",,,,0 +MS,Mississippi,USA,United States of America,state,America/Chicago,"28",East South Central,ESC,South,1 +MT,Montana,USA,United States of America,state,America/Denver,"30",Mountain,MTN,West,1 +NB,New Brunswick,CAN,Canada,province,America/Moncton,,,,,0 +NC,North Carolina,USA,United States of America,state,America/New_York,"37",South Atlantic,SAT,South,1 +ND,North Dakota,USA,United States of America,state,America/Chicago,"38",West North Central,WNC,Midwest,1 +NE,Nebraska,USA,United States of America,state,America/Chicago,"31",West North Central,WNC,Midwest,1 +NH,New Hampshire,USA,United States of America,state,America/New_York,"33",New England,NEW,Northeast,1 +NJ,New Jersey,USA,United States of America,state,America/New_York,"34",Middle Atlantic,MAT,Northeast,1 +NL,Newfoundland and Labrador,CAN,Canada,province,America/St_Johns,,,,,0 +NM,New Mexico,USA,United States of America,state,America/Denver,"35",Mountain,MTN,West,1 +NS,Nova Scotia,CAN,Canada,province,America/Halifax,,,,,0 +NT,Northwest Territories,CAN,Canada,territory,America/Yellowknife,,,,,0 +NU,Nunavut,CAN,Canada,territory,America/Iqaluit,,,,,0 +NV,Nevada,USA,United States of America,state,America/Los_Angeles,"32",Mountain,MTN,West,1 +NY,New York,USA,United States of America,state,America/New_York,"36",Middle Atlantic,MAT,Northeast,1 +OH,Ohio,USA,United States of America,state,America/New_York,"39",East North Central,ENC,Midwest,1 +OK,Oklahoma,USA,United States of America,state,America/Chicago,"40",West South Central,WSC,South,1 +ON,Ontario,CAN,Canada,province,America/Toronto,,,,,0 +OR,Oregon,USA,United States of America,state,America/Los_Angeles,"41",Pacific Contiguous,PCC,West,1 +PA,Pennsylvania,USA,United States of America,state,America/New_York,"42",Middle Atlantic,MAT,Northeast,1 +PE,Prince Edwards Island,CAN,Canada,province,America/Halifax,,,,,0 +PR,Puerto Rico,USA,United States of America,outlying_area,America/Puerto_Rico,"72",,,,1 +QC,Quebec,CAN,Canada,province,America/Montreal,,,,,0 +RI,Rhode Island,USA,United States of America,state,America/New_York,"44",New England,NEW,Northeast,1 +SC,South Carolina,USA,United States of America,state,America/New_York,"45",South Atlantic,SAT,South,1 +SD,South Dakota,USA,United States of America,state,America/Chicago,"46",West North Central,WNC,Midwest,1 +SK,Saskatchewan,CAN,Canada,province,America/Regina,,,,,0 +TN,Tennessee,USA,United States of America,state,America/Chicago,"47",East South Central,ESC,South,1 +TX,Texas,USA,United States of America,state,America/Chicago,"48",West South Central,WSC,South,1 +UT,Utah,USA,United States of America,state,America/Denver,"49",Mountain,MTN,West,1 +VA,Virginia,USA,United States of America,state,America/New_York,"51",South Atlantic,SAT,South,1 +VI,Virgin Islands,USA,United States of America,outlying_area,America/Port_of_Spain,"78",,,,0 +VT,Vermont,USA,United States of America,state,America/New_York,"50",New England,NEW,Northeast,1 +WA,Washington,USA,United States of America,state,America/Los_Angeles,"53",Pacific Contiguous,PCC,West,1 +WI,Wisconsin,USA,United States of America,state,America/Chicago,"55",East North Central,ENC,Midwest,1 +WV,West Virginia,USA,United States of America,state,America/New_York,"54",South Atlantic,SAT,South,1 +WY,Wyoming,USA,United States of America,state,America/Denver,"56",Mountain,MTN,West,1 +YT,Yukon Territory,CAN,Canada,territory,America/Whitehorse,,,,,0 + """ + ), + dtype={ + "subdivision_code": "string", + "subdivision_name": "string", + "country_code": "string", + "country_name": "string", + "subdivision_type": "string", + "timezone": "string", + "state_id_fips": "string", + "division_name_us_census": "string", + "division_code_us_census": "string", + "region_name_us_census": "string", + "is_epacems_state": bool, + }, +) +"""Static attributes of sub-national political jurisdictions. + +Note AK and PR have incomplete EPA CEMS data, and so are excluded from is_epacems_state: +See +https://github.com/catalyst-cooperative/pudl/issues/1264 +""" FIPS_CODE_VINTAGE = 2020 diff --git a/src/dbcp/helpers.py b/src/dbcp/helpers.py index f1fdfafc..bf19053e 100644 --- a/src/dbcp/helpers.py +++ b/src/dbcp/helpers.py @@ -6,6 +6,7 @@ from io import StringIO from pathlib import Path +import addfips import fsspec import google.auth import pandas as pd @@ -255,3 +256,54 @@ def trim_columns_length(df: pd.DataFrame, length_limit: int = 63) -> pd.DataFram """Trim column length of a pandas dataframe to satisfy postgres column length limit.""" df.columns = [col[:length_limit] for col in df.columns] return df + + +def add_fips_ids( + df: pd.DataFrame, + state_col: str = "state", + county_col: str = "county", + vintage: int = 2015, +) -> pd.DataFrame: + """Add State and County FIPS IDs to a dataframe. + + To just add State FIPS IDs, make county_col = None. + """ + # force the columns to be the nullable string types so we have a consistent + # null value to filter out before feeding to addfips + df = df.astype({state_col: pd.StringDtype()}) + if county_col: + df = df.astype({county_col: pd.StringDtype()}) + af = addfips.AddFIPS(vintage=vintage) + # Lookup the state and county FIPS IDs and add them to the dataframe: + df["state_id_fips"] = df.apply( + lambda x: ( + af.get_state_fips(state=x[state_col]) if pd.notnull(x[state_col]) else pd.NA + ), + axis=1, + ) + + # force the code columns to be nullable strings - the leading zeros are + # important + df = df.astype({"state_id_fips": pd.StringDtype()}) + + logger.info( + f"Assigned state FIPS codes for " + f"{len(df[df.state_id_fips.notnull()])/len(df):.2%} of records." + ) + if county_col: + df["county_id_fips"] = df.apply( + lambda x: ( + af.get_county_fips(state=x[state_col], county=x[county_col]) + if pd.notnull(x[county_col]) and pd.notnull(x[state_col]) + else pd.NA + ), + axis=1, + ) + # force the code columns to be nullable strings - the leading zeros are + # important + df = df.astype({"county_id_fips": pd.StringDtype()}) + logger.info( + f"Assigned county FIPS codes for " + f"{len(df[df.county_id_fips.notnull()])/len(df):.2%} of records." + ) + return df diff --git a/src/dbcp/transform/ballot_ready.py b/src/dbcp/transform/ballot_ready.py index a774b3f0..8ee3ac13 100644 --- a/src/dbcp/transform/ballot_ready.py +++ b/src/dbcp/transform/ballot_ready.py @@ -1,9 +1,10 @@ """Module for cleaning Ballot Ready data.""" + import logging import pandas as pd -from pudl.helpers import add_fips_ids +from dbcp.helpers import add_fips_ids DATETIME_COLUMNS = ["race_created_at", "race_updated_at", "election_day"] diff --git a/src/dbcp/transform/helpers.py b/src/dbcp/transform/helpers.py index 26c53b38..d9c7bdb9 100644 --- a/src/dbcp/transform/helpers.py +++ b/src/dbcp/transform/helpers.py @@ -1,4 +1,5 @@ """Common transform operations.""" + from pathlib import Path from typing import Any, Dict, List, Optional, Sequence @@ -6,8 +7,8 @@ from joblib import Memory from dbcp.constants import FIPS_CODE_VINTAGE +from dbcp.helpers import add_fips_ids from dbcp.transform.geocoding import GoogleGeocoder -from pudl.helpers import add_fips_ids as _add_fips_ids UNIX_EPOCH_ORIGIN = pd.Timestamp("01/01/1970") # Excel parser is simplified and will be one day off for dates < 1900/03/01 @@ -319,7 +320,7 @@ def add_county_fips_with_backup_geocoding( ) # copy # first try a simple FIPS lookup and split by valid/invalid fips codes # The only purpose of this step is to save API calls on the easy ones (most of them) - with_fips = _add_fips_ids( + with_fips = add_fips_ids( filled_state_locality, state_col=state_col, county_col=locality_col, @@ -350,7 +351,7 @@ def add_county_fips_with_backup_geocoding( ) nan_fips = pd.concat([nan_fips, geocoded], axis=1) # add fips using geocoded names - filled_fips = _add_fips_ids( + filled_fips = add_fips_ids( nan_fips, state_col=state_col, county_col="geocoded_containing_county", diff --git a/src/dbcp/transform/lbnl_iso_queue.py b/src/dbcp/transform/lbnl_iso_queue.py index b694b168..15704a4c 100644 --- a/src/dbcp/transform/lbnl_iso_queue.py +++ b/src/dbcp/transform/lbnl_iso_queue.py @@ -1,16 +1,17 @@ """Functions to transform LBNL ISO queue tables.""" + import logging from typing import Callable, Dict, List, Sequence import numpy as np import pandas as pd +from dbcp.helpers import add_fips_ids from dbcp.transform.helpers import ( add_county_fips_with_backup_geocoding, normalize_multicolumns_to_rows, parse_dates, ) -from pudl.helpers import add_fips_ids as _add_fips_ids logger = logging.getLogger(__name__) @@ -526,7 +527,7 @@ def _fix_independent_city_fips(location_df: pd.DataFrame) -> pd.DataFrame: .str.lower() .str.replace("^city of (.+)", lambda x: x.group(1) + " city", regex=True) ) - nan_fips = _add_fips_ids( + nan_fips = add_fips_ids( nan_fips, state_col="raw_state_name", county_col="raw_county_name" ) diff --git a/src/dbcp/transform/local_opposition.py b/src/dbcp/transform/local_opposition.py index a8f982f6..7092b253 100644 --- a/src/dbcp/transform/local_opposition.py +++ b/src/dbcp/transform/local_opposition.py @@ -1,11 +1,12 @@ """Transform functions for local opposition data.""" + from typing import Dict import pandas as pd from dbcp.constants import FIPS_CODE_VINTAGE +from dbcp.helpers import add_fips_ids from dbcp.transform.helpers import add_county_fips_with_backup_geocoding -from pudl.helpers import add_fips_ids as _add_fips_ids def _extract_years(ser: pd.Series) -> pd.Series: @@ -51,7 +52,7 @@ def _transform_state_policy(state_policy_df: pd.DataFrame) -> pd.DataFrame: Returns: pd.DataFrame: dataframe of state policies with additional columns """ - state = _add_fips_ids( + state = add_fips_ids( state_policy_df, county_col="policy", vintage=FIPS_CODE_VINTAGE ).drop(columns="county_id_fips") year_summaries = _extract_years(state.loc[:, "policy"]) @@ -128,7 +129,7 @@ def _transform_contested_projects(project_df: pd.DataFrame) -> pd.DataFrame: pd.DataFrame: dataframe of contested projects with additional columns """ # this should really use geocoding, but we don't use this data so I didn't bother. - proj = _add_fips_ids(project_df, county_col="description").drop( + proj = add_fips_ids(project_df, county_col="description").drop( columns="county_id_fips" ) year_summaries = _extract_years(proj.loc[:, "description"]) diff --git a/src/dbcp/transform/ncsl_state_permitting.py b/src/dbcp/transform/ncsl_state_permitting.py index cf294ba4..d8404209 100644 --- a/src/dbcp/transform/ncsl_state_permitting.py +++ b/src/dbcp/transform/ncsl_state_permitting.py @@ -1,11 +1,12 @@ """Tranform functions for NCSL state permitting.""" + from typing import Dict import numpy as np import pandas as pd from dbcp.constants import US_STATES_TERRITORIES -from pudl.helpers import add_fips_ids as _add_fips_ids +from dbcp.helpers import add_fips_ids def transform(raw_df: Dict[str, pd.DataFrame]) -> Dict[str, pd.DataFrame]: @@ -38,7 +39,7 @@ def transform(raw_df: Dict[str, pd.DataFrame]) -> Dict[str, pd.DataFrame]: "link": pd.StringDtype(), } transform_df = transform_df.astype(dtypes, copy=False) - transform_df = _add_fips_ids(transform_df, county_col="description").drop( + transform_df = add_fips_ids(transform_df, county_col="description").drop( columns="county_id_fips" ) transform_df.rename(columns={"state": "raw_state_name"}, inplace=True) diff --git a/src/dbcp/transform/pudl_data.py b/src/dbcp/transform/pudl_data.py index d01e4cfa..3180eeba 100644 --- a/src/dbcp/transform/pudl_data.py +++ b/src/dbcp/transform/pudl_data.py @@ -1,9 +1,10 @@ """Transform PUDL tables.""" + import pandas as pd from dbcp.constants import FIPS_CODE_VINTAGE +from dbcp.helpers import add_fips_ids from dbcp.transform.helpers import bedford_addfips_fix -from pudl.helpers import add_fips_ids as _add_fips_ids def _transform_pudl_generators(pudl_generators) -> pd.DataFrame: @@ -22,7 +23,7 @@ def _transform_pudl_generators(pudl_generators) -> pd.DataFrame: filled_location = pudl_generators.loc[:, ["state", "county"]].fillna( "" ) # copy; don't want to fill actual table - fips = _add_fips_ids(filled_location, vintage=FIPS_CODE_VINTAGE) + fips = add_fips_ids(filled_location, vintage=FIPS_CODE_VINTAGE) pudl_generators = pd.concat( [pudl_generators, fips[["state_id_fips", "county_id_fips"]]], axis=1, copy=False ) @@ -55,7 +56,7 @@ def _transform_pudl_eia860m_changelog( "" ) # copy; don't want to fill actual table - fips = _add_fips_ids(filled_location, vintage=FIPS_CODE_VINTAGE) + fips = add_fips_ids(filled_location, vintage=FIPS_CODE_VINTAGE) pudl_eia860m_changelog = pd.concat( [pudl_eia860m_changelog, fips[["state_id_fips", "county_id_fips"]]], axis=1, From ac0c2708ea66bb9c316b1d434cb8f8bd87a0d0f6 Mon Sep 17 00:00:00 2001 From: bendnorman Date: Mon, 11 Nov 2024 00:06:57 -0800 Subject: [PATCH 10/25] Move some long inline sql queries to sql files --- src/dbcp/data_mart/co2_dashboard.py | 106 +------ src/dbcp/data_mart/counties.py | 112 +------- .../fossil_infrastructure_projects.py | 187 +------------ src/dbcp/data_mart/helpers.py | 8 + src/dbcp/data_mart/projects.py | 263 +----------------- .../sql_queries/get_eia860m_current.sql | 71 +++++ .../get_energy_community_qualification.sql | 15 + .../get_existing_plant_attributes.sql | 44 +++ .../sql_queries/get_gridstatus_projects.sql | 63 +++++ .../sql_queries/get_lbnl_projects.sql | 65 +++++ .../get_offshore_wind_extra_cols.sql | 42 +++ .../sql_queries/get_proposed_fossil_infra.sql | 59 ++++ .../get_proposed_fossil_plants.sql | 38 +++ .../get_proposed_infra_projects.sql | 182 ++++++++++++ .../get_proprietary_proposed_offshore.sql | 51 ++++ 15 files changed, 654 insertions(+), 652 deletions(-) create mode 100644 src/dbcp/data_mart/sql_queries/get_eia860m_current.sql create mode 100644 src/dbcp/data_mart/sql_queries/get_energy_community_qualification.sql create mode 100644 src/dbcp/data_mart/sql_queries/get_existing_plant_attributes.sql create mode 100644 src/dbcp/data_mart/sql_queries/get_gridstatus_projects.sql create mode 100644 src/dbcp/data_mart/sql_queries/get_lbnl_projects.sql create mode 100644 src/dbcp/data_mart/sql_queries/get_offshore_wind_extra_cols.sql create mode 100644 src/dbcp/data_mart/sql_queries/get_proposed_fossil_infra.sql create mode 100644 src/dbcp/data_mart/sql_queries/get_proposed_fossil_plants.sql create mode 100644 src/dbcp/data_mart/sql_queries/get_proposed_infra_projects.sql create mode 100644 src/dbcp/data_mart/sql_queries/get_proprietary_proposed_offshore.sql diff --git a/src/dbcp/data_mart/co2_dashboard.py b/src/dbcp/data_mart/co2_dashboard.py index 6c4db4f1..cf31874b 100644 --- a/src/dbcp/data_mart/co2_dashboard.py +++ b/src/dbcp/data_mart/co2_dashboard.py @@ -3,13 +3,14 @@ The table is at the county level and contains data from. """ + from typing import Optional import pandas as pd import sqlalchemy as sa from dbcp.constants import PUDL_LATEST_YEAR -from dbcp.data_mart.helpers import _get_county_fips_df, _get_state_fips_df +from dbcp.data_mart.helpers import _get_county_fips_df, _get_state_fips_df, get_query from dbcp.helpers import get_pudl_resource, get_sql_engine from dbcp.transform.helpers import ( add_county_fips_with_backup_geocoding, @@ -234,46 +235,7 @@ def _get_existing_fossil_plants( def _get_proposed_fossil_plants(engine: sa.engine.Engine) -> pd.DataFrame: # see last SELECT statement for output columns - query = """ - WITH - active_loc as ( - select - proj.project_id, - loc.county_id_fips - from data_warehouse.iso_projects as proj - left join data_warehouse.iso_locations as loc - on loc.project_id = proj.project_id - where proj.queue_status = 'active' - ), - projects as ( - select - loc.project_id, - loc.county_id_fips, - res.capacity_mw, - res.resource_clean as resource - from active_loc as loc - left join data_warehouse.iso_resource_capacity as res - on res.project_id = loc.project_id - where res.capacity_mw is not NULL - and res.resource_clean in ('Natural Gas', 'Coal', 'Oil') - ), - w_county_names as ( - select - cfip.county_name as county, - cfip.state_id_fips, - proj.* - from projects as proj - left join data_warehouse.county_fips as cfip - on proj.county_id_fips = cfip.county_id_fips - ) - SELECT - sfip.state_name as state, - proj.* - from w_county_names as proj - left join data_warehouse.state_fips as sfip - on proj.state_id_fips = sfip.state_id_fips - ; - """ + query = get_query("get_proposed_fossil_plants.sql") df = pd.read_sql(query, engine) _estimate_proposed_power_co2e(df) df.rename(columns={"project_id": "id"}, inplace=True) @@ -358,67 +320,7 @@ def _estimate_proposed_power_co2e( def _get_proposed_fossil_infra(engine: sa.engine.Engine) -> pd.DataFrame: - query = """ - WITH - projects as ( - SELECT - project_id, - -- First multiplier below is unit conversion - -- The second is 15 percent haircut to account for realistic utilization, as per design doc. - greenhouse_gases_co2e_tpy * 0.907185 * 0.85 as co2e_tonnes_per_year - FROM data_warehouse.eip_projects - WHERE operating_status not in ('Operating', 'Under construction', 'Canceled') - ), - facilities as ( - SELECT - facility_id, - county_id_fips, - state_id_fips - FROM data_warehouse.eip_facilities - ), - association as ( - -- this query simplifies the m:m relationship - -- by taking only the first result, making it m:1. - -- Only 5 rows are dropped. - select DISTINCT ON (project_id) - project_id, - facility_id - from data_warehouse.eip_facility_project_association - order by 1, 2 DESC - ), - proj_agg_to_facility as ( - SELECT - ass.facility_id, - sum(co2e_tonnes_per_year) as co2e_tonnes_per_year - FROM projects as proj - LEFT JOIN association as ass - ON proj.project_id = ass.project_id - GROUP BY 1 - ), - facility_aggs as ( - SELECT - fac.*, - proj.co2e_tonnes_per_year - FROM proj_agg_to_facility as proj - LEFT JOIN facilities as fac - ON proj.facility_id = fac.facility_id - ), - w_county_names as ( - select - cfip.county_name as county, - proj.* - from facility_aggs as proj - left join data_warehouse.county_fips as cfip - on proj.county_id_fips = cfip.county_id_fips - ) - SELECT - sfip.state_name as state, - proj.* - from w_county_names as proj - left join data_warehouse.state_fips as sfip - on proj.state_id_fips = sfip.state_id_fips - ; - """ + query = get_query("get_proposed_fossil_infra.sql") df = pd.read_sql(query, engine) df["facility_type"] = "proposed_infrastructure" df.rename(columns={"facility_id": "id"}, inplace=True) diff --git a/src/dbcp/data_mart/counties.py b/src/dbcp/data_mart/counties.py index 3890cad4..70af57a6 100644 --- a/src/dbcp/data_mart/counties.py +++ b/src/dbcp/data_mart/counties.py @@ -41,6 +41,7 @@ _get_county_fips_df, _get_state_fips_df, _subset_db_columns, + get_query, ) from dbcp.data_mart.projects import create_long_format as create_iso_data_mart from dbcp.helpers import get_sql_engine @@ -230,52 +231,7 @@ def _get_existing_plant_attributes(engine: sa.engine.Engine) -> pd.DataFrame: # SELECT max(fuel_type_count) as max_fuel_type_count # FROM gen_fuels - query = """ - WITH - plant_fuel_aggs as ( - SELECT - plant_id_eia, - (CASE - WHEN technology_description = 'Batteries' THEN 'Battery Storage' - WHEN technology_description = 'Offshore Wind Turbine' THEN 'Offshore Wind' - WHEN fuel_type_code_pudl = 'waste' THEN 'other' - ELSE fuel_type_code_pudl - END - ) as resource, - sum(net_generation_mwh) as net_gen_by_fuel, - sum(capacity_mw) as capacity_by_fuel, - max(generator_operating_date) as max_operating_date - from data_warehouse.pudl_generators - where operational_status = 'existing' - group by 1, 2 - ), - plant_capacity as ( - SELECT - plant_id_eia, - sum(capacity_by_fuel) as capacity_mw - from plant_fuel_aggs - group by 1 - ), - all_aggs as ( - SELECT - * - from plant_fuel_aggs as pfuel - LEFT JOIN plant_capacity as pcap - USING (plant_id_eia) - ) - -- select fuel type with the largest generation (with capacity as tiebreaker) - -- https://stackoverflow.com/questions/3800551/select-first-row-in-each-group-by-group/7630564 - -- NOTE: this is not appropriate for fields that require aggregation, hence CTEs above - SELECT DISTINCT ON (plant_id_eia) - plant_id_eia, - resource, - -- net_gen_by_fuel for debugging - max_operating_date, - capacity_mw - from all_aggs - ORDER BY plant_id_eia, net_gen_by_fuel DESC NULLS LAST, capacity_by_fuel DESC NULLS LAST - ; - """ + query = get_query("get_existing_plant_attributes.sql") df = pd.read_sql(query, engine) resource_map = { "gas": "Natural Gas", @@ -668,50 +624,7 @@ def _get_offshore_wind_extra_cols(engine: sa.engine.Engine) -> pd.DataFrame: # they intentionally double-count capacity. The theory is that the loss # of any port could block the whole associated project, so we want # to know how much total capacity is at stake in each port county. - query = """ - WITH - proj_ports AS ( - SELECT - proj."project_id", - port.location_id, - locs.county_id_fips, - proj."capacity_mw" - FROM "data_warehouse"."offshore_wind_projects" as proj - INNER JOIN data_warehouse.offshore_wind_port_association as port - USING(project_id) - INNER JOIN data_warehouse.offshore_wind_locations as locs - USING(location_id) - ), - -- select * from proj_ports - -- order by project_id, location_id - port_aggs AS ( - SELECT - county_id_fips, - -- intentional double-counting here. The theory is that the loss - -- of any port could block the whole associated project, so we want - -- to know how much total capacity is at stake in each port county. - SUM(capacity_mw) as offshore_wind_capacity_mw_via_ports - FROM proj_ports - GROUP BY 1 - order by 1 - ), - interest AS ( - SELECT - county_id_fips, - string_agg(distinct(why_of_interest), ',' order by why_of_interest) as offshore_wind_interest_type - FROM data_warehouse.offshore_wind_locations as locs - GROUP BY 1 - ORDER BY 1 - ) - SELECT - county_id_fips, - offshore_wind_capacity_mw_via_ports, - offshore_wind_interest_type - FROM interest - LEFT JOIN port_aggs - USING(county_id_fips) - where county_id_fips is not NULL; - """ + query = get_query("get_offshore_wind_extra_cols.sql") df = pd.read_sql(query, engine) df.set_index("county_id_fips", inplace=True) return df @@ -779,25 +692,8 @@ def _get_federal_land_fraction(postgres_engine: sa.engine.Engine): def _get_energy_community_qualification(postgres_engine: sa.engine.Engine): # NOTE: this query contains hardcoded parameters for the # energy communities qualification criteria - query = """ - WITH - ec as ( - SELECT - ec.county_id_fips, - coal_qualifying_area_fraction as energy_community_coal_closures_area_fraction, - qualifies_by_employment_criteria as energy_community_qualifies_via_employment - FROM data_warehouse.energy_communities_by_county AS ec - LEFT JOIN data_warehouse.county_fips AS fips - USING (county_id_fips) - ) - SELECT - *, - (energy_community_coal_closures_area_fraction > 0.5 OR - energy_community_qualifies_via_employment) as energy_community_qualifies - FROM ec - """ + query = get_query("get_energy_community_qualification.sql") ec = pd.read_sql(query, postgres_engine) - return ec diff --git a/src/dbcp/data_mart/fossil_infrastructure_projects.py b/src/dbcp/data_mart/fossil_infrastructure_projects.py index 8adbd4b4..d0096741 100644 --- a/src/dbcp/data_mart/fossil_infrastructure_projects.py +++ b/src/dbcp/data_mart/fossil_infrastructure_projects.py @@ -1,197 +1,16 @@ """Module to create a table of EIP fossil infrastructure projects for use in spreadsheet tools.""" + from typing import Optional import pandas as pd import sqlalchemy as sa +from dbcp.data_mart.helpers import get_query from dbcp.helpers import get_sql_engine def _get_proposed_infra_projects(engine: sa.engine.Engine) -> pd.DataFrame: - query = """ - WITH - projects as ( - SELECT - project_id, - name as project_name, - classification as project_classification, - cost_millions, - date_modified, - industry_sector, - project_description, - raw_project_type, - raw_number_of_jobs_promised, - -- First multiplier below is unit conversion - -- The second is 15 percent haircut to account for realistic utilization, as per design doc. - greenhouse_gases_co2e_tpy * 0.907185 * 0.85 as co2e_tonnes_per_year, - volatile_organic_compounds_voc_tpy * 0.907185 * 0.85 as voc_tonnes_per_year, - sulfur_dioxide_so2_tpy * 0.907185 * 0.85 as so2_tonnes_per_year, - nitrogen_oxides_nox_tpy * 0.907185 * 0.85 as nox_tonnes_per_year, - carbon_monoxide_co_tpy * 0.907185 * 0.85 as co_tonnes_per_year, - particulate_matter_pm2_5_tpy * 0.907185 * 0.85 as pm2_5_tonnes_per_year, - total_wetlands_affected_permanently_acres, - total_wetlands_affected_temporarily_acres, - is_ally_target, - operating_status - FROM data_warehouse.eip_projects - WHERE operating_status not in ('Operating', 'Under construction', 'Canceled') - ), - facilities as ( - SELECT - facility_id, - county_id_fips, - state_id_fips, - name as facility_name, - latitude, - longitude, - -- concat with separator - concat_ws(', ', raw_street_address, raw_city, raw_zip_code) as raw_street_address, - facility_description, - raw_estimated_population_within_3_miles, - raw_percent_low_income_within_3_miles, - raw_percent_people_of_color_within_3_miles, - raw_respiratory_hazard_index as raw_respiratory_hazard_index_within_3_miles, - raw_air_toxics_cancer_risk_nata_cancer_risk as raw_relative_cancer_risk_per_million_within_3_miles, - raw_wastewater_discharge_indicator - FROM data_warehouse.eip_facilities - ), - permits as ( - SELECT - air_construction_id, - description_or_purpose as permit_description - -- The following would be good additions BUT I'd need to - -- fix the 1:m project:permit association first. - -- Need to take only the most recent permit. - -- This is perfectly doable but deferring for time. - --permit_status, - --raw_deadline_to_begin_construction, - --raw_last_day_to_comment - from data_warehouse.eip_air_constr_permits - ), - proj_fac_association as ( - -- this query simplifies the m:m relationship - -- by taking only the first result, making it m:1. - -- Only 6 / 681 rows are dropped. - select DISTINCT ON (project_id) - project_id, - facility_id - from data_warehouse.eip_facility_project_association - ), - proj_permit_association as ( - -- this query simplifies the m:m relationship - -- by taking only the first result, making it m:1. - -- 276 / 831 rows are dropped (lots of permits). - -- This method should be replaced by most recent permit. - select DISTINCT ON (project_id) - project_id, - air_construction_id - from data_warehouse.eip_project_permit_association - ), - proj_facility_id as ( - SELECT - proj.*, - ass.facility_id - FROM projects as proj - LEFT JOIN proj_fac_association as ass - ON proj.project_id = ass.project_id - ), - proj_facility as ( - SELECT - proj.*, - -- everything except fac.facility_id (duplicated with proj.facility_id) - fac.county_id_fips, - fac.state_id_fips, - fac.facility_name, - fac.latitude, - fac.longitude, - fac.raw_street_address, - fac.facility_description, - fac.raw_estimated_population_within_3_miles, - fac.raw_percent_low_income_within_3_miles, - fac.raw_percent_people_of_color_within_3_miles, - fac.raw_respiratory_hazard_index_within_3_miles, - fac.raw_relative_cancer_risk_per_million_within_3_miles, - fac.raw_wastewater_discharge_indicator - FROM proj_facility_id as proj - LEFT JOIN facilities as fac - ON proj.facility_id = fac.facility_id - ), - proj_fac_permit_id as ( - SELECT - proj.*, - ass.air_construction_id - FROM proj_facility as proj - LEFT JOIN proj_permit_association as ass - ON proj.project_id = ass.project_id - ), - proj_fac_perm as ( - SELECT - proj.*, - -- everything except perm.air_construction_id (duplicated with proj.air_construction_id) - perm.permit_description - FROM proj_fac_permit_id as proj - LEFT JOIN permits as perm - ON proj.air_construction_id = perm.air_construction_id - ), - w_county_names as ( - select - cfip.county_name as county, - proj.* - from proj_fac_perm as proj - left join data_warehouse.county_fips as cfip - on proj.county_id_fips = cfip.county_id_fips - ), - final as( - SELECT - sfip.state_name as state, - proj.* - from w_county_names as proj - left join data_warehouse.state_fips as sfip - on proj.state_id_fips = sfip.state_id_fips - ) - SELECT - -- reorder column names to be more friendly - project_id, - project_name, - state, - county, - county_id_fips, - state_id_fips, - latitude, - longitude, - raw_street_address, - air_construction_id, - facility_id, - facility_name, - project_classification, - operating_status, - industry_sector, - raw_project_type, - project_description, - facility_description, - permit_description, - cost_millions, - raw_number_of_jobs_promised, - date_modified, - co2e_tonnes_per_year, - voc_tonnes_per_year, - so2_tonnes_per_year, - nox_tonnes_per_year, - co_tonnes_per_year, - pm2_5_tonnes_per_year, - total_wetlands_affected_permanently_acres, - total_wetlands_affected_temporarily_acres, - raw_estimated_population_within_3_miles, - raw_percent_low_income_within_3_miles, - raw_percent_people_of_color_within_3_miles, - raw_respiratory_hazard_index_within_3_miles, - raw_relative_cancer_risk_per_million_within_3_miles, - raw_wastewater_discharge_indicator, - is_ally_target - FROM final - ORDER BY 2, 1 - ; - """ + query = get_query("get_proposed_infra_projects.sql") df = pd.read_sql(query, engine) # fix columns with mixed dtypes that break pyarrow and parquet (via pandas_gbq) df.loc[:, "is_ally_target"] = df.loc[:, "is_ally_target"].astype(str) diff --git a/src/dbcp/data_mart/helpers.py b/src/dbcp/data_mart/helpers.py index 7b3a553b..c4038d6d 100644 --- a/src/dbcp/data_mart/helpers.py +++ b/src/dbcp/data_mart/helpers.py @@ -1,5 +1,6 @@ """Module of helper functions for creating data mart tables from the data warehouse.""" +from pathlib import Path from typing import Optional, Sequence import pandas as pd @@ -342,3 +343,10 @@ def _estimate_proposed_power_co2e( ] iso_projects.drop(columns=intermediates, inplace=True) return + + +def get_query(filename: str) -> str: + """Get the query from a file.""" + sql_query_dir = Path(__file__).parent / "sql_queries" + full_path = sql_query_dir / filename + return full_path.read_text() diff --git a/src/dbcp/data_mart/projects.py b/src/dbcp/data_mart/projects.py index 4b55d028..7e08d59e 100644 --- a/src/dbcp/data_mart/projects.py +++ b/src/dbcp/data_mart/projects.py @@ -14,6 +14,7 @@ _estimate_proposed_power_co2e, _get_county_fips_df, _get_state_fips_df, + get_query, ) from dbcp.helpers import get_sql_engine @@ -25,71 +26,7 @@ def _get_gridstatus_projects(engine: sa.engine.Engine) -> pd.DataFrame: # drops transmission projects - query = """ - WITH - proj_res AS ( - SELECT - queue_id, - is_nearly_certain, - project_id, - project_name, - capacity_mw, - developer, - entity, - entity AS iso_region, -- these are different in non-ISO data from LBNL - utility, - proposed_completion_date AS date_proposed_online, - point_of_interconnection, - is_actionable, - resource_clean, - queue_status, - queue_date AS date_entered_queue, - actual_completion_date, - withdrawn_date, - interconnection_status_raw AS interconnection_status - FROM data_warehouse.gridstatus_projects as proj - LEFT JOIN data_warehouse.gridstatus_resource_capacity as res - USING (project_id) - WHERE resource_clean != 'Transmission' - ), - loc as ( - -- projects can have multiple locations, though 99 percent have only one. - -- Can multiply capacity by frac_locations_in_county to allocate it equally. - -- Note that there are some duplicates of (project_id, county_id_fips) as well. - -- This happens when the original data lists multiple city names that are in the - -- same county. This does not cause double counting because of frac_locations_in_county. - SELECT - project_id, - state_id_fips, - county_id_fips, - (1.0 / count(*) over (partition by project_id))::real as frac_locations_in_county - FROM data_warehouse.gridstatus_locations - ), - gs as ( - SELECT - proj_res.*, - loc.state_id_fips, - loc.county_id_fips, - -- projects with missing location info get full capacity allocation - coalesce(loc.frac_locations_in_county, 1.0) as frac_locations_in_county - FROM proj_res - LEFT JOIN loc - USING (project_id) - ) - SELECT - sfip.state_name AS state, - cfip.county_name AS county, - gs.*, - 'gridstatus' AS source, - ncsl.permitting_type AS state_permitting_type - FROM gs - LEFT JOIN data_warehouse.ncsl_state_permitting AS ncsl - on gs.state_id_fips = ncsl.state_id_fips - LEFT JOIN data_warehouse.state_fips AS sfip - ON gs.state_id_fips = sfip.state_id_fips - LEFT JOIN data_warehouse.county_fips AS cfip - ON gs.county_id_fips = cfip.county_id_fips - """ + query = get_query("get_gridstatus_projects.sql") gs = pd.read_sql(query, engine) gs = gs[gs.iso_region.str.upper().isin(GS_REGIONS)] return gs @@ -127,73 +64,7 @@ def _merge_lbnl_with_gridstatus(lbnl: pd.DataFrame, gs: pd.DataFrame) -> pd.Data def _get_lbnl_projects(engine: sa.engine.Engine, non_iso_only=True) -> pd.DataFrame: - query = """ - WITH - iso_proj_res as ( - SELECT - proj.project_id, - proj.queue_id, - proj.date_proposed as date_proposed_online, - proj.developer, - proj.entity, - proj.interconnection_status_lbnl as interconnection_status, - proj.point_of_interconnection, - proj.project_name, - proj.queue_date as date_entered_queue, - proj.queue_status, - proj.region as iso_region, - proj.utility, - proj.is_actionable, - proj.is_nearly_certain, - proj.actual_completion_date, - proj.withdrawn_date, - res.capacity_mw, - res.resource_clean - FROM data_warehouse.iso_projects as proj - INNER JOIN data_warehouse.iso_resource_capacity as res - ON proj.project_id = res.project_id - ), - loc as ( - -- Remember that projects can have multiple locations, though 99 percent have only one. - -- Can optionally multiply capacity by frac_locations_in_county to allocate it equally. - -- Note that there are some duplicates of (project_id, county_id_fips) as well. - -- This happens when the original data lists multiple city names that are in the - -- same county. This does not cause double counting because of frac_locations_in_county. - SELECT - project_id, - state_id_fips, - county_id_fips, - raw_county_name, -- for validation only - (1.0 / count(*) over (partition by project_id))::real as frac_locations_in_county - FROM data_warehouse.iso_locations - ), - iso as ( - SELECT - iso_proj_res.*, - loc.state_id_fips, - loc.county_id_fips, - loc.raw_county_name, -- for validation only - -- projects with missing location info get full capacity allocation - coalesce(loc.frac_locations_in_county, 1.0) as frac_locations_in_county - from iso_proj_res - LEFT JOIN loc - ON iso_proj_res.project_id = loc.project_id - ) - SELECT - sfip.state_name as state, - cfip.county_name as county, - iso.*, - 'lbnl' as source, - ncsl.permitting_type as state_permitting_type - from iso - left join data_warehouse.state_fips as sfip - on iso.state_id_fips = sfip.state_id_fips - left join data_warehouse.county_fips as cfip - on iso.county_id_fips = cfip.county_id_fips - left join data_warehouse.ncsl_state_permitting as ncsl - on iso.state_id_fips = ncsl.state_id_fips - ; - """ + query = get_query("get_lbnl_projects.sql") df = pd.read_sql(query, engine) if non_iso_only: df = df[~df.iso_region.isin(GS_REGIONS)] @@ -244,59 +115,7 @@ def _get_proprietary_proposed_offshore(engine: sa.engine.Engine) -> pd.DataFrame column to allocate capacity and co2e estimates to counties when aggregating. Otherwise they will be double-counted. """ - query = """ - WITH - proj_county_assoc as ( - SELECT - project_id, - locs.county_id_fips, - -- Note that "frac_locations_in_county" is a misnomer. It is actually - -- "fraction_of_locations_represented_by_this_row". When I originally - -- named it, I thought location:county was m:1, but it's actually m:m - -- because some projects list multiple towns in the same parent county - -- in the raw "county" field. - (1.0 / count(*) over (partition by project_id))::real as frac_locations_in_county - FROM data_warehouse.offshore_wind_cable_landing_association as cable - INNER JOIN data_warehouse.offshore_wind_locations as locs - USING(location_id) - ) - -- join the project, state, and county stuff - SELECT - proj.project_id, - assoc.county_id_fips, - -- projects with missing location info get full capacity allocation - CASE WHEN assoc.frac_locations_in_county IS NULL - THEN 1.0 - ELSE assoc.frac_locations_in_county - END as frac_locations_in_county, - substr(assoc.county_id_fips, 1, 2) as state_id_fips, - - proj.name as project_name, - proj.developer, - proj."capacity_mw", - date(proj.proposed_completion_year::text || '-01-01') as date_proposed_online, - proj.queue_status, - 'Offshore Wind' as resource_clean, - 0.0 as co2e_tonnes_per_year, - proj.is_actionable, - proj.is_nearly_certain, - 'proprietary' as source, - - sfip.state_name as state, - cfip.county_name as county, - ncsl.permitting_type as state_permitting_type - - FROM data_warehouse.offshore_wind_projects as proj - LEFT JOIN proj_county_assoc as assoc - USING(project_id) - LEFT JOIN data_warehouse.state_fips as sfip - ON substr(assoc.county_id_fips, 1, 2) = sfip.state_id_fips - LEFT JOIN data_warehouse.county_fips as cfip - USING(county_id_fips) - LEFT JOIN data_warehouse.ncsl_state_permitting as ncsl - ON substr(assoc.county_id_fips, 1, 2) = ncsl.state_id_fips - ; - """ + query = get_query("get_proprietary_proposed_offshore.sql") df = pd.read_sql(query, engine) return df @@ -995,79 +814,7 @@ def get_eia860m_current(engine: sa.engine.Engine) -> pd.DataFrame: Args: engine (sa.engine.Engine): connection to the data warehouse database """ - query = """ - WITH - -- 3307 / 33943 current projects are missing a balancing authority code (mostly - -- retired projects). But a simple county lookup can fill in half (1642 / 3307) of them: - -- 1932 / 2400 counties with a project missing a BA code have a single unique - -- BA code among the other projects in that county. These are a pretty safe bet - -- to impute. Counties with multiple or zero BAs are not imputed. - imputed_bal_auth AS ( - SELECT - county_id_fips, - max(balancing_authority_code_eia) as unique_ba -- only one unique value - FROM data_warehouse.pudl_eia860m_changelog - WHERE valid_until_date = ( - select max(valid_until_date) FROM data_warehouse.pudl_eia860m_changelog - ) - GROUP BY 1 - HAVING count(distinct balancing_authority_code_eia) = 1 - ) - SELECT - report_date, - plant_id_eia, - plant_name_eia, - utility_id_eia, - utility_name_eia, - generator_id, - capacity_mw, - eia.state_id_fips, - eia.county_id_fips, - sfips.state_name as state, - cfips.county_name as county, - -- 1. Impute BA codes - -- 2. Convert EIA ISO abbreviations to match those used in LBNL/GridStatus - -- 3. Name it iso_region for consistency with LBNL/GridStatus ISO queues - CASE COALESCE(balancing_authority_code_eia, imputed_ba.unique_ba) - WHEN 'CISO' THEN 'CAISO' - WHEN 'ERCO' THEN 'ERCOT' - WHEN 'ISNE' THEN 'ISONE' - WHEN 'NYIS' THEN 'NYISO' - WHEN 'SWPP ' THEN 'SPP' - -- MISO and PJM are unchanged - ELSE COALESCE(balancing_authority_code_eia, imputed_ba.unique_ba) - END as iso_region, - current_planned_generator_operating_date, - energy_source_code_1, - prime_mover_code, - energy_storage_capacity_mwh, - fuel_type_code_pudl, - generator_retirement_date, - latitude, - longitude, - operational_status_code, - operational_status AS operational_status_category, - raw_operational_status_code, - planned_derate_date, - planned_generator_retirement_date, - planned_net_summer_capacity_derate_mw, - planned_net_summer_capacity_uprate_mw, - planned_uprate_date, - technology_description, - state as raw_state, - county as raw_county - FROM data_warehouse.pudl_eia860m_changelog as eia - LEFT JOIN data_warehouse.state_fips as sfips - USING (state_id_fips) - LEFT JOIN data_warehouse.county_fips as cfips - USING (county_id_fips) - LEFT JOIN imputed_bal_auth as imputed_ba - ON eia.county_id_fips = imputed_ba.county_id_fips - WHERE valid_until_date = ( - select max(valid_until_date) FROM data_warehouse.pudl_eia860m_changelog - ) - ORDER BY plant_id_eia, generator_id - """ + query = get_query("get_eia860m_current.sql") current_projects = pd.read_sql(query, engine) return current_projects diff --git a/src/dbcp/data_mart/sql_queries/get_eia860m_current.sql b/src/dbcp/data_mart/sql_queries/get_eia860m_current.sql new file mode 100644 index 00000000..d61fcac3 --- /dev/null +++ b/src/dbcp/data_mart/sql_queries/get_eia860m_current.sql @@ -0,0 +1,71 @@ + WITH + -- 3307 / 33943 current projects are missing a balancing authority code (mostly + -- retired projects). But a simple county lookup can fill in half (1642 / 3307) of them: + -- 1932 / 2400 counties with a project missing a BA code have a single unique + -- BA code among the other projects in that county. These are a pretty safe bet + -- to impute. Counties with multiple or zero BAs are not imputed. + imputed_bal_auth AS ( + SELECT + county_id_fips, + max(balancing_authority_code_eia) as unique_ba -- only one unique value + FROM data_warehouse.pudl_eia860m_changelog + WHERE valid_until_date = ( + select max(valid_until_date) FROM data_warehouse.pudl_eia860m_changelog + ) + GROUP BY 1 + HAVING count(distinct balancing_authority_code_eia) = 1 + ) + SELECT + report_date, + plant_id_eia, + plant_name_eia, + utility_id_eia, + utility_name_eia, + generator_id, + capacity_mw, + eia.state_id_fips, + eia.county_id_fips, + sfips.state_name as state, + cfips.county_name as county, + -- 1. Impute BA codes + -- 2. Convert EIA ISO abbreviations to match those used in LBNL/GridStatus + -- 3. Name it iso_region for consistency with LBNL/GridStatus ISO queues + CASE COALESCE(balancing_authority_code_eia, imputed_ba.unique_ba) + WHEN 'CISO' THEN 'CAISO' + WHEN 'ERCO' THEN 'ERCOT' + WHEN 'ISNE' THEN 'ISONE' + WHEN 'NYIS' THEN 'NYISO' + WHEN 'SWPP ' THEN 'SPP' + -- MISO and PJM are unchanged + ELSE COALESCE(balancing_authority_code_eia, imputed_ba.unique_ba) + END as iso_region, + current_planned_generator_operating_date, + energy_source_code_1, + prime_mover_code, + energy_storage_capacity_mwh, + fuel_type_code_pudl, + generator_retirement_date, + latitude, + longitude, + operational_status_code, + operational_status AS operational_status_category, + raw_operational_status_code, + planned_derate_date, + planned_generator_retirement_date, + planned_net_summer_capacity_derate_mw, + planned_net_summer_capacity_uprate_mw, + planned_uprate_date, + technology_description, + state as raw_state, + county as raw_county + FROM data_warehouse.pudl_eia860m_changelog as eia + LEFT JOIN data_warehouse.state_fips as sfips + USING (state_id_fips) + LEFT JOIN data_warehouse.county_fips as cfips + USING (county_id_fips) + LEFT JOIN imputed_bal_auth as imputed_ba + ON eia.county_id_fips = imputed_ba.county_id_fips + WHERE valid_until_date = ( + select max(valid_until_date) FROM data_warehouse.pudl_eia860m_changelog + ) + ORDER BY plant_id_eia, generator_id diff --git a/src/dbcp/data_mart/sql_queries/get_energy_community_qualification.sql b/src/dbcp/data_mart/sql_queries/get_energy_community_qualification.sql new file mode 100644 index 00000000..5c789676 --- /dev/null +++ b/src/dbcp/data_mart/sql_queries/get_energy_community_qualification.sql @@ -0,0 +1,15 @@ +WITH + ec as ( + SELECT + ec.county_id_fips, + coal_qualifying_area_fraction as energy_community_coal_closures_area_fraction, + qualifies_by_employment_criteria as energy_community_qualifies_via_employment + FROM data_warehouse.energy_communities_by_county AS ec + LEFT JOIN data_warehouse.county_fips AS fips + USING (county_id_fips) + ) + SELECT + *, + (energy_community_coal_closures_area_fraction > 0.5 OR + energy_community_qualifies_via_employment) as energy_community_qualifies + FROM ec diff --git a/src/dbcp/data_mart/sql_queries/get_existing_plant_attributes.sql b/src/dbcp/data_mart/sql_queries/get_existing_plant_attributes.sql new file mode 100644 index 00000000..385eb25a --- /dev/null +++ b/src/dbcp/data_mart/sql_queries/get_existing_plant_attributes.sql @@ -0,0 +1,44 @@ +WITH + plant_fuel_aggs as ( + SELECT + plant_id_eia, + (CASE + WHEN technology_description = 'Batteries' THEN 'Battery Storage' + WHEN technology_description = 'Offshore Wind Turbine' THEN 'Offshore Wind' + WHEN fuel_type_code_pudl = 'waste' THEN 'other' + ELSE fuel_type_code_pudl + END + ) as resource, + sum(net_generation_mwh) as net_gen_by_fuel, + sum(capacity_mw) as capacity_by_fuel, + max(generator_operating_date) as max_operating_date + from data_warehouse.pudl_generators + where operational_status = 'existing' + group by 1, 2 + ), + plant_capacity as ( + SELECT + plant_id_eia, + sum(capacity_by_fuel) as capacity_mw + from plant_fuel_aggs + group by 1 + ), + all_aggs as ( + SELECT + * + from plant_fuel_aggs as pfuel + LEFT JOIN plant_capacity as pcap + USING (plant_id_eia) + ) + -- select fuel type with the largest generation (with capacity as tiebreaker) + -- https://stackoverflow.com/questions/3800551/select-first-row-in-each-group-by-group/7630564 + -- NOTE: this is not appropriate for fields that require aggregation, hence CTEs above + SELECT DISTINCT ON (plant_id_eia) + plant_id_eia, + resource, + -- net_gen_by_fuel for debugging + max_operating_date, + capacity_mw + from all_aggs + ORDER BY plant_id_eia, net_gen_by_fuel DESC NULLS LAST, capacity_by_fuel DESC NULLS LAST + ; diff --git a/src/dbcp/data_mart/sql_queries/get_gridstatus_projects.sql b/src/dbcp/data_mart/sql_queries/get_gridstatus_projects.sql new file mode 100644 index 00000000..76705d67 --- /dev/null +++ b/src/dbcp/data_mart/sql_queries/get_gridstatus_projects.sql @@ -0,0 +1,63 @@ +WITH + proj_res AS ( + SELECT + queue_id, + is_nearly_certain, + project_id, + project_name, + capacity_mw, + developer, + entity, + entity AS iso_region, -- these are different in non-ISO data from LBNL + utility, + proposed_completion_date AS date_proposed_online, + point_of_interconnection, + is_actionable, + resource_clean, + queue_status, + queue_date AS date_entered_queue, + actual_completion_date, + withdrawn_date, + interconnection_status_raw AS interconnection_status + FROM data_warehouse.gridstatus_projects as proj + LEFT JOIN data_warehouse.gridstatus_resource_capacity as res + USING (project_id) + WHERE resource_clean != 'Transmission' + ), + loc as ( + -- projects can have multiple locations, though 99 percent have only one. + -- Can multiply capacity by frac_locations_in_county to allocate it equally. + -- Note that there are some duplicates of (project_id, county_id_fips) as well. + -- This happens when the original data lists multiple city names that are in the + -- same county. This does not cause double counting because of frac_locations_in_county. + SELECT + project_id, + state_id_fips, + county_id_fips, + (1.0 / count(*) over (partition by project_id))::real as frac_locations_in_county + FROM data_warehouse.gridstatus_locations + ), + gs as ( + SELECT + proj_res.*, + loc.state_id_fips, + loc.county_id_fips, + -- projects with missing location info get full capacity allocation + coalesce(loc.frac_locations_in_county, 1.0) as frac_locations_in_county + FROM proj_res + LEFT JOIN loc + USING (project_id) + ) + SELECT + sfip.state_name AS state, + cfip.county_name AS county, + gs.*, + 'gridstatus' AS source, + ncsl.permitting_type AS state_permitting_type + FROM gs + LEFT JOIN data_warehouse.ncsl_state_permitting AS ncsl + on gs.state_id_fips = ncsl.state_id_fips + LEFT JOIN data_warehouse.state_fips AS sfip + ON gs.state_id_fips = sfip.state_id_fips + LEFT JOIN data_warehouse.county_fips AS cfip + ON gs.county_id_fips = cfip.county_id_fips diff --git a/src/dbcp/data_mart/sql_queries/get_lbnl_projects.sql b/src/dbcp/data_mart/sql_queries/get_lbnl_projects.sql new file mode 100644 index 00000000..aca2a270 --- /dev/null +++ b/src/dbcp/data_mart/sql_queries/get_lbnl_projects.sql @@ -0,0 +1,65 @@ +WITH + iso_proj_res as ( + SELECT + proj.project_id, + proj.queue_id, + proj.date_proposed as date_proposed_online, + proj.developer, + proj.entity, + proj.interconnection_status_lbnl as interconnection_status, + proj.point_of_interconnection, + proj.project_name, + proj.queue_date as date_entered_queue, + proj.queue_status, + proj.region as iso_region, + proj.utility, + proj.is_actionable, + proj.is_nearly_certain, + proj.actual_completion_date, + proj.withdrawn_date, + res.capacity_mw, + res.resource_clean + FROM data_warehouse.iso_projects as proj + INNER JOIN data_warehouse.iso_resource_capacity as res + ON proj.project_id = res.project_id + ), + loc as ( + -- Remember that projects can have multiple locations, though 99 percent have only one. + -- Can optionally multiply capacity by frac_locations_in_county to allocate it equally. + -- Note that there are some duplicates of (project_id, county_id_fips) as well. + -- This happens when the original data lists multiple city names that are in the + -- same county. This does not cause double counting because of frac_locations_in_county. + SELECT + project_id, + state_id_fips, + county_id_fips, + raw_county_name, -- for validation only + (1.0 / count(*) over (partition by project_id))::real as frac_locations_in_county + FROM data_warehouse.iso_locations + ), + iso as ( + SELECT + iso_proj_res.*, + loc.state_id_fips, + loc.county_id_fips, + loc.raw_county_name, -- for validation only + -- projects with missing location info get full capacity allocation + coalesce(loc.frac_locations_in_county, 1.0) as frac_locations_in_county + from iso_proj_res + LEFT JOIN loc + ON iso_proj_res.project_id = loc.project_id + ) + SELECT + sfip.state_name as state, + cfip.county_name as county, + iso.*, + 'lbnl' as source, + ncsl.permitting_type as state_permitting_type + from iso + left join data_warehouse.state_fips as sfip + on iso.state_id_fips = sfip.state_id_fips + left join data_warehouse.county_fips as cfip + on iso.county_id_fips = cfip.county_id_fips + left join data_warehouse.ncsl_state_permitting as ncsl + on iso.state_id_fips = ncsl.state_id_fips + ; diff --git a/src/dbcp/data_mart/sql_queries/get_offshore_wind_extra_cols.sql b/src/dbcp/data_mart/sql_queries/get_offshore_wind_extra_cols.sql new file mode 100644 index 00000000..126f84f2 --- /dev/null +++ b/src/dbcp/data_mart/sql_queries/get_offshore_wind_extra_cols.sql @@ -0,0 +1,42 @@ + WITH + proj_ports AS ( + SELECT + proj."project_id", + port.location_id, + locs.county_id_fips, + proj."capacity_mw" + FROM "data_warehouse"."offshore_wind_projects" as proj + INNER JOIN data_warehouse.offshore_wind_port_association as port + USING(project_id) + INNER JOIN data_warehouse.offshore_wind_locations as locs + USING(location_id) + ), + -- select * from proj_ports + -- order by project_id, location_id + port_aggs AS ( + SELECT + county_id_fips, + -- intentional double-counting here. The theory is that the loss + -- of any port could block the whole associated project, so we want + -- to know how much total capacity is at stake in each port county. + SUM(capacity_mw) as offshore_wind_capacity_mw_via_ports + FROM proj_ports + GROUP BY 1 + order by 1 + ), + interest AS ( + SELECT + county_id_fips, + string_agg(distinct(why_of_interest), ',' order by why_of_interest) as offshore_wind_interest_type + FROM data_warehouse.offshore_wind_locations as locs + GROUP BY 1 + ORDER BY 1 + ) + SELECT + county_id_fips, + offshore_wind_capacity_mw_via_ports, + offshore_wind_interest_type + FROM interest + LEFT JOIN port_aggs + USING(county_id_fips) + where county_id_fips is not NULL; diff --git a/src/dbcp/data_mart/sql_queries/get_proposed_fossil_infra.sql b/src/dbcp/data_mart/sql_queries/get_proposed_fossil_infra.sql new file mode 100644 index 00000000..fb04a4c9 --- /dev/null +++ b/src/dbcp/data_mart/sql_queries/get_proposed_fossil_infra.sql @@ -0,0 +1,59 @@ +WITH + projects as ( + SELECT + project_id, + -- First multiplier below is unit conversion + -- The second is 15 percent haircut to account for realistic utilization, as per design doc. + greenhouse_gases_co2e_tpy * 0.907185 * 0.85 as co2e_tonnes_per_year + FROM data_warehouse.eip_projects + WHERE operating_status not in ('Operating', 'Under construction', 'Canceled') + ), + facilities as ( + SELECT + facility_id, + county_id_fips, + state_id_fips + FROM data_warehouse.eip_facilities + ), + association as ( + -- this query simplifies the m:m relationship + -- by taking only the first result, making it m:1. + -- Only 5 rows are dropped. + select DISTINCT ON (project_id) + project_id, + facility_id + from data_warehouse.eip_facility_project_association + order by 1, 2 DESC + ), + proj_agg_to_facility as ( + SELECT + ass.facility_id, + sum(co2e_tonnes_per_year) as co2e_tonnes_per_year + FROM projects as proj + LEFT JOIN association as ass + ON proj.project_id = ass.project_id + GROUP BY 1 + ), + facility_aggs as ( + SELECT + fac.*, + proj.co2e_tonnes_per_year + FROM proj_agg_to_facility as proj + LEFT JOIN facilities as fac + ON proj.facility_id = fac.facility_id + ), + w_county_names as ( + select + cfip.county_name as county, + proj.* + from facility_aggs as proj + left join data_warehouse.county_fips as cfip + on proj.county_id_fips = cfip.county_id_fips + ) + SELECT + sfip.state_name as state, + proj.* + from w_county_names as proj + left join data_warehouse.state_fips as sfip + on proj.state_id_fips = sfip.state_id_fips + ; diff --git a/src/dbcp/data_mart/sql_queries/get_proposed_fossil_plants.sql b/src/dbcp/data_mart/sql_queries/get_proposed_fossil_plants.sql new file mode 100644 index 00000000..7aaad984 --- /dev/null +++ b/src/dbcp/data_mart/sql_queries/get_proposed_fossil_plants.sql @@ -0,0 +1,38 @@ +WITH + active_loc as ( + select + proj.project_id, + loc.county_id_fips + from data_warehouse.iso_projects as proj + left join data_warehouse.iso_locations as loc + on loc.project_id = proj.project_id + where proj.queue_status = 'active' + ), + projects as ( + select + loc.project_id, + loc.county_id_fips, + res.capacity_mw, + res.resource_clean as resource + from active_loc as loc + left join data_warehouse.iso_resource_capacity as res + on res.project_id = loc.project_id + where res.capacity_mw is not NULL + and res.resource_clean in ('Natural Gas', 'Coal', 'Oil') + ), + w_county_names as ( + select + cfip.county_name as county, + cfip.state_id_fips, + proj.* + from projects as proj + left join data_warehouse.county_fips as cfip + on proj.county_id_fips = cfip.county_id_fips + ) + SELECT + sfip.state_name as state, + proj.* + from w_county_names as proj + left join data_warehouse.state_fips as sfip + on proj.state_id_fips = sfip.state_id_fips + ; diff --git a/src/dbcp/data_mart/sql_queries/get_proposed_infra_projects.sql b/src/dbcp/data_mart/sql_queries/get_proposed_infra_projects.sql new file mode 100644 index 00000000..17390458 --- /dev/null +++ b/src/dbcp/data_mart/sql_queries/get_proposed_infra_projects.sql @@ -0,0 +1,182 @@ + WITH + projects as ( + SELECT + project_id, + name as project_name, + classification as project_classification, + cost_millions, + date_modified, + industry_sector, + project_description, + raw_project_type, + raw_number_of_jobs_promised, + -- First multiplier below is unit conversion + -- The second is 15 percent haircut to account for realistic utilization, as per design doc. + greenhouse_gases_co2e_tpy * 0.907185 * 0.85 as co2e_tonnes_per_year, + volatile_organic_compounds_voc_tpy * 0.907185 * 0.85 as voc_tonnes_per_year, + sulfur_dioxide_so2_tpy * 0.907185 * 0.85 as so2_tonnes_per_year, + nitrogen_oxides_nox_tpy * 0.907185 * 0.85 as nox_tonnes_per_year, + carbon_monoxide_co_tpy * 0.907185 * 0.85 as co_tonnes_per_year, + particulate_matter_pm2_5_tpy * 0.907185 * 0.85 as pm2_5_tonnes_per_year, + total_wetlands_affected_permanently_acres, + total_wetlands_affected_temporarily_acres, + is_ally_target, + operating_status + FROM data_warehouse.eip_projects + WHERE operating_status not in ('Operating', 'Under construction', 'Canceled') + ), + facilities as ( + SELECT + facility_id, + county_id_fips, + state_id_fips, + name as facility_name, + latitude, + longitude, + -- concat with separator + concat_ws(', ', raw_street_address, raw_city, raw_zip_code) as raw_street_address, + facility_description, + raw_estimated_population_within_3_miles, + raw_percent_low_income_within_3_miles, + raw_percent_people_of_color_within_3_miles, + raw_respiratory_hazard_index as raw_respiratory_hazard_index_within_3_miles, + raw_air_toxics_cancer_risk_nata_cancer_risk as raw_relative_cancer_risk_per_million_within_3_miles, + raw_wastewater_discharge_indicator + FROM data_warehouse.eip_facilities + ), + permits as ( + SELECT + air_construction_id, + description_or_purpose as permit_description + -- The following would be good additions BUT I'd need to + -- fix the 1:m project:permit association first. + -- Need to take only the most recent permit. + -- This is perfectly doable but deferring for time. + --permit_status, + --raw_deadline_to_begin_construction, + --raw_last_day_to_comment + from data_warehouse.eip_air_constr_permits + ), + proj_fac_association as ( + -- this query simplifies the m:m relationship + -- by taking only the first result, making it m:1. + -- Only 6 / 681 rows are dropped. + select DISTINCT ON (project_id) + project_id, + facility_id + from data_warehouse.eip_facility_project_association + ), + proj_permit_association as ( + -- this query simplifies the m:m relationship + -- by taking only the first result, making it m:1. + -- 276 / 831 rows are dropped (lots of permits). + -- This method should be replaced by most recent permit. + select DISTINCT ON (project_id) + project_id, + air_construction_id + from data_warehouse.eip_project_permit_association + ), + proj_facility_id as ( + SELECT + proj.*, + ass.facility_id + FROM projects as proj + LEFT JOIN proj_fac_association as ass + ON proj.project_id = ass.project_id + ), + proj_facility as ( + SELECT + proj.*, + -- everything except fac.facility_id (duplicated with proj.facility_id) + fac.county_id_fips, + fac.state_id_fips, + fac.facility_name, + fac.latitude, + fac.longitude, + fac.raw_street_address, + fac.facility_description, + fac.raw_estimated_population_within_3_miles, + fac.raw_percent_low_income_within_3_miles, + fac.raw_percent_people_of_color_within_3_miles, + fac.raw_respiratory_hazard_index_within_3_miles, + fac.raw_relative_cancer_risk_per_million_within_3_miles, + fac.raw_wastewater_discharge_indicator + FROM proj_facility_id as proj + LEFT JOIN facilities as fac + ON proj.facility_id = fac.facility_id + ), + proj_fac_permit_id as ( + SELECT + proj.*, + ass.air_construction_id + FROM proj_facility as proj + LEFT JOIN proj_permit_association as ass + ON proj.project_id = ass.project_id + ), + proj_fac_perm as ( + SELECT + proj.*, + -- everything except perm.air_construction_id (duplicated with proj.air_construction_id) + perm.permit_description + FROM proj_fac_permit_id as proj + LEFT JOIN permits as perm + ON proj.air_construction_id = perm.air_construction_id + ), + w_county_names as ( + select + cfip.county_name as county, + proj.* + from proj_fac_perm as proj + left join data_warehouse.county_fips as cfip + on proj.county_id_fips = cfip.county_id_fips + ), + final as( + SELECT + sfip.state_name as state, + proj.* + from w_county_names as proj + left join data_warehouse.state_fips as sfip + on proj.state_id_fips = sfip.state_id_fips + ) + SELECT + -- reorder column names to be more friendly + project_id, + project_name, + state, + county, + county_id_fips, + state_id_fips, + latitude, + longitude, + raw_street_address, + air_construction_id, + facility_id, + facility_name, + project_classification, + operating_status, + industry_sector, + raw_project_type, + project_description, + facility_description, + permit_description, + cost_millions, + raw_number_of_jobs_promised, + date_modified, + co2e_tonnes_per_year, + voc_tonnes_per_year, + so2_tonnes_per_year, + nox_tonnes_per_year, + co_tonnes_per_year, + pm2_5_tonnes_per_year, + total_wetlands_affected_permanently_acres, + total_wetlands_affected_temporarily_acres, + raw_estimated_population_within_3_miles, + raw_percent_low_income_within_3_miles, + raw_percent_people_of_color_within_3_miles, + raw_respiratory_hazard_index_within_3_miles, + raw_relative_cancer_risk_per_million_within_3_miles, + raw_wastewater_discharge_indicator, + is_ally_target + FROM final + ORDER BY 2, 1 + ; diff --git a/src/dbcp/data_mart/sql_queries/get_proprietary_proposed_offshore.sql b/src/dbcp/data_mart/sql_queries/get_proprietary_proposed_offshore.sql new file mode 100644 index 00000000..5833aa4a --- /dev/null +++ b/src/dbcp/data_mart/sql_queries/get_proprietary_proposed_offshore.sql @@ -0,0 +1,51 @@ +WITH + proj_county_assoc as ( + SELECT + project_id, + locs.county_id_fips, + -- Note that "frac_locations_in_county" is a misnomer. It is actually + -- "fraction_of_locations_represented_by_this_row". When I originally + -- named it, I thought location:county was m:1, but it's actually m:m + -- because some projects list multiple towns in the same parent county + -- in the raw "county" field. + (1.0 / count(*) over (partition by project_id))::real as frac_locations_in_county + FROM data_warehouse.offshore_wind_cable_landing_association as cable + INNER JOIN data_warehouse.offshore_wind_locations as locs + USING(location_id) + ) + -- join the project, state, and county stuff + SELECT + proj.project_id, + assoc.county_id_fips, + -- projects with missing location info get full capacity allocation + CASE WHEN assoc.frac_locations_in_county IS NULL + THEN 1.0 + ELSE assoc.frac_locations_in_county + END as frac_locations_in_county, + substr(assoc.county_id_fips, 1, 2) as state_id_fips, + + proj.name as project_name, + proj.developer, + proj."capacity_mw", + date(proj.proposed_completion_year::text || '-01-01') as date_proposed_online, + proj.queue_status, + 'Offshore Wind' as resource_clean, + 0.0 as co2e_tonnes_per_year, + proj.is_actionable, + proj.is_nearly_certain, + 'proprietary' as source, + + sfip.state_name as state, + cfip.county_name as county, + ncsl.permitting_type as state_permitting_type + + FROM data_warehouse.offshore_wind_projects as proj + LEFT JOIN proj_county_assoc as assoc + USING(project_id) + LEFT JOIN data_warehouse.state_fips as sfip + ON substr(assoc.county_id_fips, 1, 2) = sfip.state_id_fips + LEFT JOIN data_warehouse.county_fips as cfip + USING(county_id_fips) + LEFT JOIN data_warehouse.ncsl_state_permitting as ncsl + ON substr(assoc.county_id_fips, 1, 2) = ncsl.state_id_fips + ; From 37457fd2bc7ec6b440a5d3a825c217378e16297f Mon Sep 17 00:00:00 2001 From: bendnorman Date: Tue, 12 Nov 2024 18:06:13 -0900 Subject: [PATCH 11/25] Flesh out get_query doc string --- src/dbcp/data_mart/helpers.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/dbcp/data_mart/helpers.py b/src/dbcp/data_mart/helpers.py index c4038d6d..98bef889 100644 --- a/src/dbcp/data_mart/helpers.py +++ b/src/dbcp/data_mart/helpers.py @@ -346,7 +346,25 @@ def _estimate_proposed_power_co2e( def get_query(filename: str) -> str: - """Get the query from a file.""" + """ + Get the query from a file. + + To avoid having to write long queries in Python, we store them in separate files + in the src/dbcp/sql_queries directory. To use them, call this function with the + filename of the query you want to use. + + Args: + filename: name of the file in the sql_queries directory with the .sql extension + Returns: + the query as a string + Example: + >>> import pandas as pd + >>> from dbcp.data_mart.helpers import get_query + >>> from dbcp.helpers import get_sql_engine + >>> engine = get_sql_engine() + >>> query = get_query("get_proposed_infra_projects.sql") + >>> df = pd.read_sql(query, engine) + """ sql_query_dir = Path(__file__).parent / "sql_queries" full_path = sql_query_dir / filename return full_path.read_text() From a3650d27ac324643428066ad73ed709658985252 Mon Sep 17 00:00:00 2001 From: bendnorman Date: Tue, 12 Nov 2024 18:38:16 -0900 Subject: [PATCH 12/25] Add required packages that were in PUDL --- Dockerfile | 2 +- requirements.txt | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 1c719378..e457130a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ # set base image (host OS) FROM python:3.10 -RUN apt-get update && apt-get install sqlite3 +RUN apt-get -y update && apt-get -y install sqlite3 RUN apt-get -y install libgdal-dev RUN useradd -d /app/ dbcp diff --git a/requirements.txt b/requirements.txt index 6d2af04d..2072b9a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,9 @@ plotly~=5.15.0 gridstatus~=0.20.0 s3fs>=2022.11.0 click>=8.1.7 +numpy<2 +sqlalchemy<2 +coloredlogs~=15.0.1 +scipy~=1.14.1 +google-cloud-storage~=2.18.2 +geopandas~=1.0.1 From 03aa1c2281ac4276daa607f39ed95f4d72f26422 Mon Sep 17 00:00:00 2001 From: bendnorman Date: Wed, 13 Nov 2024 17:48:47 -0900 Subject: [PATCH 13/25] Fix iso queue change log end of quarter edge case and init directory for data update notebooks --- .../gridstatus/quarterly_update.ipynb | 970 ++++++++++++++++++ notebooks/data_updates/outputs/.gitignore | 4 + src/dbcp/data_mart/projects.py | 5 +- 3 files changed, 977 insertions(+), 2 deletions(-) create mode 100644 notebooks/data_updates/gridstatus/quarterly_update.ipynb create mode 100644 notebooks/data_updates/outputs/.gitignore diff --git a/notebooks/data_updates/gridstatus/quarterly_update.ipynb b/notebooks/data_updates/gridstatus/quarterly_update.ipynb new file mode 100644 index 00000000..ef66429a --- /dev/null +++ b/notebooks/data_updates/gridstatus/quarterly_update.ipynb @@ -0,0 +1,970 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "537fc080-51d5-43fb-8780-33ac2c2a5228", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/app/.local/lib/python3.10/site-packages/geopandas/_compat.py:123: UserWarning: The Shapely GEOS version (3.11.4-CAPI-1.17.4) is incompatible with the GEOS version PyGEOS was compiled with (3.10.3-CAPI-1.16.1). Conversions between both will be slow.\n", + " warnings.warn(\n", + "/app/.local/lib/python3.10/site-packages/pudl/analysis/spatial.py:7: UserWarning: Shapely 2.0 is installed, but because PyGEOS is also installed, GeoPandas will still use PyGEOS by default for now. To force to use and test Shapely 2.0, you have to set the environment variable USE_PYGEOS=0. You can do this before starting the Python process, or in your code before importing geopandas:\n", + "\n", + "import os\n", + "os.environ['USE_PYGEOS'] = '0'\n", + "import geopandas\n", + "\n", + "In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).\n", + " import geopandas as gpd\n" + ] + }, + { + "data": { + "text/plain": [ + "{'miso': '1728242350923420',\n", + " 'miso-pre-2017': '1709776311574737',\n", + " 'caiso': '1728242351254356',\n", + " 'pjm': '1728242351606642',\n", + " 'ercot': '1728242351929200',\n", + " 'spp': '1728242352244156',\n", + " 'nyiso': '1728242352584485',\n", + " 'isone': '1728242352913470'}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import dbcp\n", + "from dbcp.extract.gridstatus_isoqueues import ISO_QUEUE_VERSIONS\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "id": "8e7c16a6-ad86-40ef-b5cd-3a10e07d112f", + "metadata": {}, + "source": [ + "## Get latest generation number for archives\n", + "Each time the [gridstatus archiver](https://github.com/deployment-gap-model-education-fund/deployment-gap-model-archiver) is run, GCS creates a new generation number for the new version of the data. The follow code grabs the latest generation number of the interconneciton queue data for each ISO." + ] + }, + { + "cell_type": "code", + "execution_count": 159, + "id": "a89c74f3-79e6-4d52-b0d9-70b04544b417", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-10-06 19:19:11.021000+00:00\n", + "2024-10-06 19:19:11.349000+00:00\n", + "2024-10-06 19:19:11.701000+00:00\n", + "2024-10-06 19:19:12.020000+00:00\n", + "2024-10-06 19:19:12.340000+00:00\n", + "2024-10-06 19:19:12.683000+00:00\n", + "2024-10-06 19:19:13.005000+00:00\n" + ] + } + ], + "source": [ + "from google.cloud import storage\n", + "from datetime import datetime\n", + "\n", + "def get_generation_number_closest_to_date(bucket_name, blob_name, target_date):\n", + " client = storage.Client()\n", + " bucket = client.bucket(bucket_name)\n", + "\n", + " # Enable listing versions of the blob\n", + " blobs = bucket.list_blobs(prefix=blob_name, versions=True)\n", + "\n", + " # Filter and sort blobs by time difference\n", + " closest_blob = None\n", + " min_time_diff = float('inf')\n", + " target_timestamp = target_date.timestamp()\n", + " \n", + " blobs = sorted(blobs, key=lambda blob: blob.updated.timestamp())\n", + "\n", + " for blob in blobs:\n", + " # Get the last modified time and calculate difference from target date\n", + " last_modified = blob.updated.timestamp()\n", + "\n", + " if target_timestamp < last_modified:\n", + " print(blob.updated)\n", + " return str(blob.generation)\n", + "\n", + "# Example usage\n", + "bucket_name = \"dgm-archive\"\n", + "target_date = datetime(2024, 9, 30) # Specify the date you want to find closest to\n", + "\n", + "updated_iso_queue_version = ISO_QUEUE_VERSIONS.copy()\n", + "\n", + "for iso_region in ISO_QUEUE_VERSIONS.keys():\n", + " if iso_region == \"miso-pre-2017\":\n", + " continue\n", + " blob_name = f\"gridstatus/interconnection_queues/parquet/{iso_region}.parquet\"\n", + " updated_iso_queue_version[iso_region] = get_generation_number_closest_to_date(bucket_name, blob_name, target_date)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "id": "0a1a398a-c8c2-4b97-924b-71067067cc5d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'miso': '1728242350923420',\n", + " 'miso-pre-2017': '1709776311574737',\n", + " 'caiso': '1728242351254356',\n", + " 'pjm': '1728242351606642',\n", + " 'ercot': '1728242351929200',\n", + " 'spp': '1728242352244156',\n", + " 'nyiso': '1728242352584485',\n", + " 'isone': '1728242352913470'}" + ] + }, + "execution_count": 160, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "updated_iso_queue_version" + ] + }, + { + "cell_type": "markdown", + "id": "c337d4d8-1b94-4ec9-a568-8b531930a423", + "metadata": {}, + "source": [ + "Copy and past the old version numbers from `dbcp.extract.gridstatus_isoqueues.ISO_QUEUE_VERSIONS`." + ] + }, + { + "cell_type": "code", + "execution_count": 162, + "id": "4e9d8f51-ba23-4a1f-8ac7-bd25f5cdd276", + "metadata": {}, + "outputs": [], + "source": [ + "old_queue_version = {'miso': '1719774997006069',\n", + " 'miso-pre-2017': '1709776311574737',\n", + " 'caiso': '1719774997530790',\n", + " 'pjm': '1719774998059470',\n", + " 'ercot': '1719774998544416',\n", + " 'spp': '1719774998998901',\n", + " 'nyiso': '1719774999497797',\n", + " 'isone': '1719774999940225'}" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "id": "24d9143e-ee81-47e9-999a-a6c87b26e080", + "metadata": {}, + "outputs": [], + "source": [ + "old_iso_queues = dbcp.extract.gridstatus_isoqueues.extract(old_queue_version)\n", + "new_iso_queues = dbcp.extract.gridstatus_isoqueues.extract(updated_iso_queue_version)" + ] + }, + { + "cell_type": "markdown", + "id": "eefc3252-07c8-41e5-ac83-a5e91069bcdd", + "metadata": {}, + "source": [ + "## Compare max dates of raw data\n", + "The follow code prints out the latest date a project entered a queue for each ISO in the old and new data. We should expect the latest project date in the new data to be larger than the that of the old data. There are currently two exceptions to this:\n", + "\n", + "1. CAISO: We haven't been able to figure out how CAISO publishes data about active projects in the interconneciton queue. The CAISO data from Gridstatus rarely updates so we rely on the LBNL data.\n", + "2. PJM: PJM [is working through a backlog of projects](https://www.utilitydive.com/news/pjm-fast-track-reliability-projects-interconnection-queue-invenergy/729311/) and isn't accepting new projects until mid 2026." + ] + }, + { + "cell_type": "code", + "execution_count": 166, + "id": "24357798-b604-4717-8179-9af8eec8a495", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "miso\n", + " - Old max date 2024-06-07 04:00:00+00:00\n", + " - New max date 2024-09-27 04:00:00+00:00\n", + "\n", + "caiso\n", + " - Old max date 2023-03-02 08:00:00\n", + " - New max date 2023-03-02 08:00:00\n", + "\n", + "pjm\n", + " - Old max date 2023-07-08 00:00:00\n", + " - New max date 2023-07-08 00:00:00\n", + "\n", + "ercot\n", + " - Old max date 2024-05-20 00:00:00\n", + " - New max date 2024-09-10 00:00:00\n", + "\n", + "spp\n", + " - Old max date 2024-05-07 00:00:00\n", + " - New max date 2024-08-02 00:00:00\n", + "\n", + "nyiso\n", + " - Old max date 2024-05-23 00:00:00\n", + " - New max date 2024-08-05 00:00:00\n", + "\n", + "isone\n", + " - Old max date 2024-06-26 00:00:00\n", + " - New max date 2024-08-21 00:00:00\n", + "\n" + ] + } + ], + "source": [ + "for iso_region in old_iso_queues.keys():\n", + " if iso_region == \"miso-pre-2017\":\n", + " continue\n", + " print(iso_region)\n", + " old_df = old_iso_queues[iso_region]\n", + " new_df = new_iso_queues[iso_region]\n", + " \n", + " old_df['Queue Date'] = pd.to_datetime(old_df['Queue Date'])\n", + " new_df['Queue Date'] = pd.to_datetime(new_df['Queue Date'])\n", + " \n", + " print(f\" - Old max date {old_df['Queue Date'].max()}\")\n", + " print(f\" - New max date {new_df['Queue Date'].max()}\")\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "id": "e071b81d-8672-4bc4-94df-d75f665ca800", + "metadata": {}, + "source": [ + "## Compare data mart tables\n", + "The follow code compares the old and new total active capacity in regions.\n", + "\n", + "### How to grab the new data\n", + "To get the new data, replace `dbcp.extract.gridstatus_isoqueues.ISO_QUEUE_VERSIONS` with the updated generation numbers. Then run `make all`. There might be some data validation errors due to small changes in the expected number of projects. If the changes seem reasonable, just update the expected value in the assertion. If they don't seem reason, do some digging!\n", + "\n", + "Once the ETL succesfully finishes the new data is available in the databse.\n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "id": "2824fdcf-4ffb-477b-80cf-bbc91255cfc8", + "metadata": {}, + "outputs": [], + "source": [ + "from dbcp.helpers import get_sql_engine\n", + "\n", + "engine = get_sql_engine()\n", + "with engine.connect() as con:\n", + " new_iso_projects_long_format = pd.read_sql_table(\"iso_projects_long_format\", con, schema=\"data_mart\")" + ] + }, + { + "cell_type": "markdown", + "id": "6d0d8a6b-4b85-400d-81e2-161a17f43ec9", + "metadata": {}, + "source": [ + "### How to grab the old data\n", + "To grab the \"old\" data you'll need to download the latest version of the development outputs from GCS. Currently, we don't version the data that is saved to `gs://dgm-outputs/dev` so I'd recommend naming the subdirectory you save the outputs to with the github sha that was used to produce the outputs. You can find the appropriate git sha by viewing [this list](https://github.com/deployment-gap-model-education-fund/deployment-gap-model/actions/workflows/run-full-build.yml?query=branch%3Adev) of github action runs that produced the development data on GCS.\n", + "\n", + "From the root of this directory you can run this command to download all the development outputs:\n", + "\n", + "```bash\n", + "gsutil -m cp -r gs://dgm-outputs/dev notebooks/data_updates/outputs/{gitsha of recent dev build or current date}\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "id": "589264d6-4ba4-443c-ae97-88f2a8440d4a", + "metadata": {}, + "outputs": [], + "source": [ + "output_id = \"a0fc9e0\" # replace with the unique ID you choose to store the dev outputs locally\n", + "old_iso_projects_long_format = pd.read_parquet(f\"../outputs/{output_id}/dev/data_mart/iso_projects_long_format.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "id": "1e4ad65c-55cc-4200-bd99-80bed7d36295", + "metadata": {}, + "outputs": [], + "source": [ + "def agg_iso_projects_long_format(df):\n", + " \"\"\"Calculate some aggregate metrics for each ISO\"\"\"\n", + " agg = df.groupby(\"iso_region\").agg({\"surrogate_id\": \"count\", \"capacity_mw\": \"sum\", \"date_entered_queue\": \"max\"})\n", + " agg = agg.rename(columns={\"surrogate_id\": \"n_projects\", \"capacity_mw\": \"total_capacity_mw\", \"date_entered_queue\": \"max_date_entered_queue\"})\n", + " return agg\n", + "\n", + "old_project_agg = agg_iso_projects_long_format(old_iso_projects_long_format)\n", + "new_project_agg = agg_iso_projects_long_format(new_iso_projects_long_format)" + ] + }, + { + "cell_type": "code", + "execution_count": 179, + "id": "b795b90f-a4c4-44ba-9465-e557b623e437", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "iso_region\n", + "CAISO 2023-04-17 00:00:00\n", + "ERCOT 2024-09-10 00:00:00\n", + "ISONE 2024-08-21 00:00:00\n", + "MISO 2024-09-27 04:00:00\n", + "NYISO 2024-08-05 00:00:00\n", + "PJM 2023-07-08 00:00:00\n", + "SPP 2024-08-02 00:00:00\n", + "Southeast (non-ISO) 2023-12-15 00:00:00\n", + "West (non-ISO) 2023-12-30 00:00:00\n", + "Name: max_date_entered_queue, dtype: datetime64[ns]" + ] + }, + "execution_count": 179, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_project_agg.max_date_entered_queue" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "id": "0ec5e6f2-2aba-4d8c-8176-d3a6f818e364", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
n_projects_oldtotal_capacity_mw_oldmax_date_entered_queue_oldn_projects_newtotal_capacity_mw_newmax_date_entered_queue_new
iso_region
CAISO1410500444.80002023-04-17 00:00:001410500444.80002023-04-17 00:00:00
ERCOT1524303194.47002024-05-20 00:00:001605322303.20002024-09-10 00:00:00
ISONE52971354.41902024-06-26 00:00:0050360720.35122024-08-21 00:00:00
MISO2226382090.33002024-06-07 04:00:002186378040.10002024-09-27 04:00:00
NYISO23727591.48002024-05-21 00:00:0019824891.90002024-08-05 00:00:00
PJM2664203395.89682023-07-08 00:00:002569203615.43182023-07-08 00:00:00
SPP624123440.96202024-05-07 00:00:00610121268.00202024-08-02 00:00:00
Southeast (non-ISO)1072136024.03002023-12-15 00:00:001072136024.03002023-12-15 00:00:00
West (non-ISO)2542488401.05002023-12-30 00:00:002542488401.05002023-12-30 00:00:00
\n", + "
" + ], + "text/plain": [ + " n_projects_old total_capacity_mw_old max_date_entered_queue_old n_projects_new total_capacity_mw_new max_date_entered_queue_new\n", + "iso_region \n", + "CAISO 1410 500444.8000 2023-04-17 00:00:00 1410 500444.8000 2023-04-17 00:00:00\n", + "ERCOT 1524 303194.4700 2024-05-20 00:00:00 1605 322303.2000 2024-09-10 00:00:00\n", + "ISONE 529 71354.4190 2024-06-26 00:00:00 503 60720.3512 2024-08-21 00:00:00\n", + "MISO 2226 382090.3300 2024-06-07 04:00:00 2186 378040.1000 2024-09-27 04:00:00\n", + "NYISO 237 27591.4800 2024-05-21 00:00:00 198 24891.9000 2024-08-05 00:00:00\n", + "PJM 2664 203395.8968 2023-07-08 00:00:00 2569 203615.4318 2023-07-08 00:00:00\n", + "SPP 624 123440.9620 2024-05-07 00:00:00 610 121268.0020 2024-08-02 00:00:00\n", + "Southeast (non-ISO) 1072 136024.0300 2023-12-15 00:00:00 1072 136024.0300 2023-12-15 00:00:00\n", + "West (non-ISO) 2542 488401.0500 2023-12-30 00:00:00 2542 488401.0500 2023-12-30 00:00:00" + ] + }, + "execution_count": 180, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "both_project_aggs = old_project_agg.merge(new_project_agg, left_index=True, right_index=True, validate=\"1:1\", suffixes=(\"_old\", \"_new\"))\n", + "both_project_aggs" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "id": "dd0a7b2f-81f2-4818-88a9-edbf10d77c55", + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate the differences between the old and new\n", + "for col in old_project_agg.columns:\n", + " if pd.api.types.is_datetime64_any_dtype(old_project_agg[col]):\n", + " continue\n", + " else:\n", + " both_project_aggs[f\"{col}_pct_diff\"] = (both_project_aggs[f\"{col}_new\"] - both_project_aggs[f\"{col}_old\"]) / both_project_aggs[f\"{col}_old\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "id": "58310980-9365-4e71-a989-55d1b4625367", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
n_projects_pct_difftotal_capacity_mw_pct_diff
iso_region
CAISO0.0000000.000000
West (non-ISO)0.0000000.000000
MISO-1.796945-1.060019
ERCOT5.3149616.302467
PJM-3.5660660.107935
Southeast (non-ISO)0.0000000.000000
SPP-2.243590-1.760323
ISONE-4.914934-14.903166
NYISO-16.455696-9.784107
\n", + "
" + ], + "text/plain": [ + " n_projects_pct_diff total_capacity_mw_pct_diff\n", + "iso_region \n", + "CAISO 0.000000 0.000000\n", + "West (non-ISO) 0.000000 0.000000\n", + "MISO -1.796945 -1.060019\n", + "ERCOT 5.314961 6.302467\n", + "PJM -3.566066 0.107935\n", + "Southeast (non-ISO) 0.000000 0.000000\n", + "SPP -2.243590 -1.760323\n", + "ISONE -4.914934 -14.903166\n", + "NYISO -16.455696 -9.784107" + ] + }, + "execution_count": 182, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "old_project_agg\n", + "\n", + "both_project_aggs.sort_values(by=\"total_capacity_mw_old\", ascending=False)[[\"n_projects_pct_diff\", \"total_capacity_mw_pct_diff\"]] * 100" + ] + }, + { + "cell_type": "markdown", + "id": "13e7273f-2f8b-49b9-86bb-3f07a4a0b83e", + "metadata": {}, + "source": [ + "We don't use Gridstatus for CAISO for the reasons stated above so we filter it out in this analysis." + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "id": "ce15a3de-05ca-44f6-bc80-478cef9af017", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
n_projects_oldtotal_capacity_mw_oldmax_date_entered_queue_oldn_projects_newtotal_capacity_mw_newmax_date_entered_queue_newn_projects_pct_difftotal_capacity_mw_pct_diff
iso_region
ERCOT1524303194.47002024-05-20 00:00:001605322303.20002024-09-10 00:00:000.0531500.063025
ISONE52971354.41902024-06-26 00:00:0050360720.35122024-08-21 00:00:00-0.049149-0.149032
MISO2226382090.33002024-06-07 04:00:002186378040.10002024-09-27 04:00:00-0.017969-0.010600
NYISO23727591.48002024-05-21 00:00:0019824891.90002024-08-05 00:00:00-0.164557-0.097841
PJM2664203395.89682023-07-08 00:00:002569203615.43182023-07-08 00:00:00-0.0356610.001079
SPP624123440.96202024-05-07 00:00:00610121268.00202024-08-02 00:00:00-0.022436-0.017603
\n", + "
" + ], + "text/plain": [ + " n_projects_old total_capacity_mw_old max_date_entered_queue_old n_projects_new total_capacity_mw_new max_date_entered_queue_new n_projects_pct_diff total_capacity_mw_pct_diff\n", + "iso_region \n", + "ERCOT 1524 303194.4700 2024-05-20 00:00:00 1605 322303.2000 2024-09-10 00:00:00 0.053150 0.063025\n", + "ISONE 529 71354.4190 2024-06-26 00:00:00 503 60720.3512 2024-08-21 00:00:00 -0.049149 -0.149032\n", + "MISO 2226 382090.3300 2024-06-07 04:00:00 2186 378040.1000 2024-09-27 04:00:00 -0.017969 -0.010600\n", + "NYISO 237 27591.4800 2024-05-21 00:00:00 198 24891.9000 2024-08-05 00:00:00 -0.164557 -0.097841\n", + "PJM 2664 203395.8968 2023-07-08 00:00:00 2569 203615.4318 2023-07-08 00:00:00 -0.035661 0.001079\n", + "SPP 624 123440.9620 2024-05-07 00:00:00 610 121268.0020 2024-08-02 00:00:00 -0.022436 -0.017603" + ] + }, + "execution_count": 183, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from dbcp.data_mart.projects import GS_REGIONS\n", + "\n", + "changed_project_aggs = both_project_aggs[both_project_aggs.index.isin(GS_REGIONS)]\n", + "\n", + "changed_project_aggs" + ] + }, + { + "cell_type": "markdown", + "id": "a3cdeb1e-48e4-4aae-aa5a-4a9c57eadd07", + "metadata": {}, + "source": [ + "Make sure there isn't an surprising change in total capacity between the old and new data. We currently don't expect the active capacity to change that much in the span of a quarter. The `max_change` value is an arbitrary number so dig into the data if something looks fishy to you.\n", + "\n", + "It's challenging to validate total capacity changes in ISOs. If there is an unexpected change, I would check the ISO's website to see if they changed their study process. For example, there was a surprising drop in active capacity in NYISO during the 2024 Q4 update. It turns out they [changed their study process](https://www.utilitydive.com/news/new-york-iso-reforms-interconnection-queue-launches-cluster-study/724054/) and the layout of the spreadsheet Gridstatus pulls in. Sites like S&P and Utility Drive might have relevant informaiton." + ] + }, + { + "cell_type": "code", + "execution_count": 186, + "id": "9e058ac1-ba06-445f-b7a5-2f47b9e7f738", + "metadata": {}, + "outputs": [], + "source": [ + "mw_pct_diff = changed_project_aggs[\"total_capacity_mw_pct_diff\"].abs()\n", + "max_change = 0.2\n", + "assert mw_pct_diff.lt(max_change).all(), f\"{mw_pct_diff} substantial change in an ISO's interconneciton queue active capacity.\"" + ] + }, + { + "cell_type": "markdown", + "id": "f5c32300-1c24-4d71-8031-289484957332", + "metadata": {}, + "source": [ + "## Charts\n", + "The `iso_regions_active_projects_capacity_mw_change_log` data mart table contains historic snapshots of total active capacity in the ISO queues. Ploting the change over time if helpful for identifying issues with the data update." + ] + }, + { + "cell_type": "code", + "execution_count": 187, + "id": "0e0a0cbb-805d-4e8e-a6b2-94b0db2695a3", + "metadata": {}, + "outputs": [], + "source": [ + "with engine.connect() as con:\n", + " iso_regions_active_projects_capacity_mw_change_log = pd.read_sql_table(\"iso_regions_active_projects_capacity_mw_change_log\", con, schema=\"data_mart\")" + ] + }, + { + "cell_type": "code", + "execution_count": 188, + "id": "1416a051-132f-43e5-8a1f-e0fed1e551bc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "iso_region\n", + "CAISO 2024-09-30\n", + "ISONE 2024-09-30\n", + "MISO 2024-09-30\n", + "NYISO 2024-09-30\n", + "PJM 2027-09-30\n", + "SPP 2024-09-30\n", + "Name: report_date, dtype: datetime64[ns]" + ] + }, + "execution_count": 188, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iso_regions_active_projects_capacity_mw_change_log.groupby(\"iso_region\").report_date.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 189, + "id": "61987565-1958-4ec0-8717-1a08cbd6e7f2", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "chnglog = iso_regions_active_projects_capacity_mw_change_log.groupby([\"iso_region\", \"report_date\"]).sum().reset_index()\n", + "\n", + "for iso_region in chnglog.iso_region.unique():\n", + " iso_df = chnglog.query(\"iso_region == @iso_region\")\n", + " iso_df = iso_df[iso_df.report_date.dt.year.gt(2017) & (iso_df.report_date < \"2024-10-01\")]\n", + " iso_df = iso_df.set_index(\"report_date\")\n", + " iso_df.plot.bar(color=[\"green\", \"red\", \"grey\"], title=iso_region, stacked=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/data_updates/outputs/.gitignore b/notebooks/data_updates/outputs/.gitignore new file mode 100644 index 00000000..5e7d2734 --- /dev/null +++ b/notebooks/data_updates/outputs/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/src/dbcp/data_mart/projects.py b/src/dbcp/data_mart/projects.py index 19a14a51..6edafc96 100644 --- a/src/dbcp/data_mart/projects.py +++ b/src/dbcp/data_mart/projects.py @@ -595,8 +595,8 @@ def create_total_active_project_change_logs( ).all(), "Found rows with unexpected queue status." chng_log = active_iso_projects_change_log.copy() - min_date = chng_log.effective_date.min() - max_date = chng_log.effective_date.max() + min_date = chng_log.effective_date.min() - pd.offsets.QuarterBegin(startingMonth=1) + max_date = chng_log.effective_date.max() + pd.offsets.QuarterEnd(0) def generate_frequencies(start, end, min_date, max_date, freq="Q"): """ @@ -747,6 +747,7 @@ def create_project_change_log(long_format: pd.DataFrame) -> pd.DataFrame: long_format.resource_clean.eq("Unknown"), "other" ) + # Not all ISO regions have operational and withdrawn dates which are required to make a full change log. long_format = long_format[long_format["iso_region"].isin(CHANGE_LOG_REGIONS)] # make sure we are missing less than 10% of withdrawn_date From e4677a99c0cf2080d5a7c9e4fe9913984fa4b0b9 Mon Sep 17 00:00:00 2001 From: bendnorman Date: Wed, 13 Nov 2024 18:43:59 -0900 Subject: [PATCH 14/25] Fix small geocoding issue --- src/dbcp/transform/local_opposition.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/dbcp/transform/local_opposition.py b/src/dbcp/transform/local_opposition.py index a8f982f6..30d63b23 100644 --- a/src/dbcp/transform/local_opposition.py +++ b/src/dbcp/transform/local_opposition.py @@ -1,4 +1,5 @@ """Transform functions for local opposition data.""" + from typing import Dict import pandas as pd @@ -101,6 +102,7 @@ def _transform_local_ordinances(local_ord_df: pd.DataFrame) -> pd.DataFrame: "Town of Charlton (Worcester County)": "Charlton (Worcester County)", "City of Owasso (Rogers and Tulsa Counties)": "Owasso (Rogers and Tulsa Counties)", "City of Burleson (Tarrant and Johnson Counties)": "Burleson (Tarrant and Johnson Counties)", + "Montrose City (Genesee County)": "Montrose (Genesee County)", } local.loc[:, "locality"].replace(location_corrections, inplace=True) From 22434754a521d0a0692228a7e5250791363fef73 Mon Sep 17 00:00:00 2001 From: bendnorman Date: Wed, 13 Nov 2024 23:01:15 -0900 Subject: [PATCH 15/25] Use nyiso archive that has cluster projects --- src/dbcp/extract/gridstatus_isoqueues.py | 2 +- src/dbcp/transform/gridstatus.py | 29 ++++++++++++++++++++---- src/dbcp/validation/tests.py | 2 +- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/dbcp/extract/gridstatus_isoqueues.py b/src/dbcp/extract/gridstatus_isoqueues.py index 655b917f..0f4bbacf 100644 --- a/src/dbcp/extract/gridstatus_isoqueues.py +++ b/src/dbcp/extract/gridstatus_isoqueues.py @@ -24,7 +24,7 @@ "pjm": "1728242351606642", "ercot": "1728242351929200", "spp": "1728242352244156", - "nyiso": "1728242352584485", + "nyiso": "1731568799445816", "isone": "1728242352913470", } diff --git a/src/dbcp/transform/gridstatus.py b/src/dbcp/transform/gridstatus.py index 5d2c1c95..7f62924e 100644 --- a/src/dbcp/transform/gridstatus.py +++ b/src/dbcp/transform/gridstatus.py @@ -478,7 +478,7 @@ def _clean_resource_type( resource_locations["county_id_fips"].isin(coastal_county_id_fips.keys()) & resource_locations.resource_clean.eq("Onshore Wind") ].project_id - expected_n_coastal_wind_projects = 85 + expected_n_coastal_wind_projects = 88 assert ( len(nyiso_coastal_wind_project_project_ids) == expected_n_coastal_wind_projects ), f"Expected {expected_n_coastal_wind_projects} NYISO coastal wind projects but found {len(nyiso_coastal_wind_project_project_ids)}" @@ -850,12 +850,31 @@ def _transform_nyiso(iso_df: pd.DataFrame) -> pd.DataFrame: "0": "Withdrawn", "15": "Partial In-Service", "P": "Pending Adoption of IP Compliance with Order 2023", # Vast majority of projects with status 'P' don't have any studies posted yet + "1C": "IR Validated/ Scoping Meeting Pending", + "2C": "Customer Engagement Window", + "3C": "Phase 1 Entry Decision Period", + "4C": "Phase 1 Study", + "5C": "Phase 2 Entry Decision Period", + "6C": "Phase 2 Study", + "7C": "Final Decision Period", + "10C": "Accepted Cost Allocation/ IA in Progress", + "11C": "IA Completed", + "12C": "Under Construction", + "13C": "In Service for Test", + "14C": "In Service Commercial", + "15C": "Partially In-Service/ Partially under construction", } # Categorize project status - iso_df["S"] = pd.to_numeric(iso_df["S"]).astype("Int64").astype("string") - actionable_vals = ("6", "7", "8", "9", "10") - nearly_certain_vals = ("11", "12", "13", "15") + iso_df["S"] = iso_df["S"].str.replace(r"\.0$", "", regex=True) + actionable_vals = ( + "6", + "7", + "8", + "9", + "10", + ) # TODO: What are actionable values for cluster studies? + nearly_certain_vals = ("11", "12", "13", "15", "11C", "12C", "13C", "15C") iso_df = _create_project_status_classification_from_single_column( iso_df, "S", @@ -932,7 +951,7 @@ def _normalize_project_locations(iso_df: pd.DataFrame) -> pd.DataFrame: geocoded_locations[["county_id_fips", "project_id"]].duplicated(keep=False) ] assert ( - len(duplicate_locations) <= 114 + len(duplicate_locations) <= 116 ), f"Found more duplicate locations in Grid Status location table than expected:\n {duplicate_locations}" return geocoded_locations diff --git a/src/dbcp/validation/tests.py b/src/dbcp/validation/tests.py index 7b97838a..20cb81df 100644 --- a/src/dbcp/validation/tests.py +++ b/src/dbcp/validation/tests.py @@ -243,7 +243,7 @@ def test_county_wide_coverage(engine: Engine): df.shape[0] == n_counties ), "counties_wide_format does not contain all counties" notnull = df.notnull() - n_expected_counties = 2458 + n_expected_counties = 2459 assert notnull.any(axis=1).sum() == n_expected_counties, ( "counties_wide_format has unexpected county coverage." f" Expected {n_expected_counties}, found {notnull.any(axis=1).sum()}" From a629cdf879ecc62261696eac4e690a4a4c95eb2e Mon Sep 17 00:00:00 2001 From: bendnorman Date: Thu, 14 Nov 2024 17:28:21 -0900 Subject: [PATCH 16/25] Update gs update notebook with new nyiso data --- .../gridstatus/quarterly_update.ipynb | 135 ++++++++++-------- 1 file changed, 72 insertions(+), 63 deletions(-) diff --git a/notebooks/data_updates/gridstatus/quarterly_update.ipynb b/notebooks/data_updates/gridstatus/quarterly_update.ipynb index ef66429a..b970e934 100644 --- a/notebooks/data_updates/gridstatus/quarterly_update.ipynb +++ b/notebooks/data_updates/gridstatus/quarterly_update.ipynb @@ -116,30 +116,19 @@ }, { "cell_type": "code", - "execution_count": 160, + "execution_count": 190, "id": "0a1a398a-c8c2-4b97-924b-71067067cc5d", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'miso': '1728242350923420',\n", - " 'miso-pre-2017': '1709776311574737',\n", - " 'caiso': '1728242351254356',\n", - " 'pjm': '1728242351606642',\n", - " 'ercot': '1728242351929200',\n", - " 'spp': '1728242352244156',\n", - " 'nyiso': '1728242352584485',\n", - " 'isone': '1728242352913470'}" - ] - }, - "execution_count": 160, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "updated_iso_queue_version" + "updated_iso_queue_version = {'miso': '1728242350923420',\n", + " 'miso-pre-2017': '1709776311574737',\n", + " 'caiso': '1728242351254356',\n", + " 'pjm': '1728242351606642',\n", + " 'ercot': '1728242351929200',\n", + " 'spp': '1728242352244156',\n", + " 'nyiso': '1731568799445816',\n", + " 'isone': '1728242352913470'}\n" ] }, { @@ -152,7 +141,7 @@ }, { "cell_type": "code", - "execution_count": 162, + "execution_count": 191, "id": "4e9d8f51-ba23-4a1f-8ac7-bd25f5cdd276", "metadata": {}, "outputs": [], @@ -169,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": 163, + "execution_count": 192, "id": "24d9143e-ee81-47e9-999a-a6c87b26e080", "metadata": {}, "outputs": [], @@ -192,7 +181,7 @@ }, { "cell_type": "code", - "execution_count": 166, + "execution_count": 193, "id": "24357798-b604-4717-8179-9af8eec8a495", "metadata": { "tags": [] @@ -224,7 +213,7 @@ "\n", "nyiso\n", " - Old max date 2024-05-23 00:00:00\n", - " - New max date 2024-08-05 00:00:00\n", + " - New max date 2024-10-29 00:00:00\n", "\n", "isone\n", " - Old max date 2024-06-26 00:00:00\n", @@ -281,7 +270,7 @@ }, { "cell_type": "code", - "execution_count": 175, + "execution_count": 194, "id": "2824fdcf-4ffb-477b-80cf-bbc91255cfc8", "metadata": {}, "outputs": [], @@ -310,7 +299,7 @@ }, { "cell_type": "code", - "execution_count": 177, + "execution_count": 195, "id": "589264d6-4ba4-443c-ae97-88f2a8440d4a", "metadata": {}, "outputs": [], @@ -321,7 +310,7 @@ }, { "cell_type": "code", - "execution_count": 178, + "execution_count": 196, "id": "1e4ad65c-55cc-4200-bd99-80bed7d36295", "metadata": {}, "outputs": [], @@ -338,7 +327,7 @@ }, { "cell_type": "code", - "execution_count": 179, + "execution_count": 197, "id": "b795b90f-a4c4-44ba-9465-e557b623e437", "metadata": {}, "outputs": [ @@ -350,7 +339,7 @@ "ERCOT 2024-09-10 00:00:00\n", "ISONE 2024-08-21 00:00:00\n", "MISO 2024-09-27 04:00:00\n", - "NYISO 2024-08-05 00:00:00\n", + "NYISO 2024-10-15 00:00:00\n", "PJM 2023-07-08 00:00:00\n", "SPP 2024-08-02 00:00:00\n", "Southeast (non-ISO) 2023-12-15 00:00:00\n", @@ -358,7 +347,7 @@ "Name: max_date_entered_queue, dtype: datetime64[ns]" ] }, - "execution_count": 179, + "execution_count": 197, "metadata": {}, "output_type": "execute_result" } @@ -369,7 +358,7 @@ }, { "cell_type": "code", - "execution_count": 180, + "execution_count": 198, "id": "0ec5e6f2-2aba-4d8c-8176-d3a6f818e364", "metadata": {}, "outputs": [ @@ -453,9 +442,9 @@ " 237\n", " 27591.4800\n", " 2024-05-21 00:00:00\n", - " 198\n", - " 24891.9000\n", - " 2024-08-05 00:00:00\n", + " 450\n", + " 75817.1100\n", + " 2024-10-15 00:00:00\n", " \n", " \n", " PJM\n", @@ -504,14 +493,14 @@ "ERCOT 1524 303194.4700 2024-05-20 00:00:00 1605 322303.2000 2024-09-10 00:00:00\n", "ISONE 529 71354.4190 2024-06-26 00:00:00 503 60720.3512 2024-08-21 00:00:00\n", "MISO 2226 382090.3300 2024-06-07 04:00:00 2186 378040.1000 2024-09-27 04:00:00\n", - "NYISO 237 27591.4800 2024-05-21 00:00:00 198 24891.9000 2024-08-05 00:00:00\n", + "NYISO 237 27591.4800 2024-05-21 00:00:00 450 75817.1100 2024-10-15 00:00:00\n", "PJM 2664 203395.8968 2023-07-08 00:00:00 2569 203615.4318 2023-07-08 00:00:00\n", "SPP 624 123440.9620 2024-05-07 00:00:00 610 121268.0020 2024-08-02 00:00:00\n", "Southeast (non-ISO) 1072 136024.0300 2023-12-15 00:00:00 1072 136024.0300 2023-12-15 00:00:00\n", "West (non-ISO) 2542 488401.0500 2023-12-30 00:00:00 2542 488401.0500 2023-12-30 00:00:00" ] }, - "execution_count": 180, + "execution_count": 198, "metadata": {}, "output_type": "execute_result" } @@ -523,7 +512,7 @@ }, { "cell_type": "code", - "execution_count": 181, + "execution_count": 199, "id": "dd0a7b2f-81f2-4818-88a9-edbf10d77c55", "metadata": {}, "outputs": [], @@ -538,7 +527,7 @@ }, { "cell_type": "code", - "execution_count": 182, + "execution_count": 200, "id": "58310980-9365-4e71-a989-55d1b4625367", "metadata": {}, "outputs": [ @@ -615,8 +604,8 @@ " \n", " \n", " NYISO\n", - " -16.455696\n", - " -9.784107\n", + " 89.873418\n", + " 174.784499\n", " \n", " \n", "\n", @@ -633,10 +622,10 @@ "Southeast (non-ISO) 0.000000 0.000000\n", "SPP -2.243590 -1.760323\n", "ISONE -4.914934 -14.903166\n", - "NYISO -16.455696 -9.784107" + "NYISO 89.873418 174.784499" ] }, - "execution_count": 182, + "execution_count": 200, "metadata": {}, "output_type": "execute_result" } @@ -657,7 +646,7 @@ }, { "cell_type": "code", - "execution_count": 183, + "execution_count": 201, "id": "ce15a3de-05ca-44f6-bc80-478cef9af017", "metadata": {}, "outputs": [ @@ -742,11 +731,11 @@ " 237\n", " 27591.4800\n", " 2024-05-21 00:00:00\n", - " 198\n", - " 24891.9000\n", - " 2024-08-05 00:00:00\n", - " -0.164557\n", - " -0.097841\n", + " 450\n", + " 75817.1100\n", + " 2024-10-15 00:00:00\n", + " 0.898734\n", + " 1.747845\n", " \n", " \n", " PJM\n", @@ -780,12 +769,12 @@ "ERCOT 1524 303194.4700 2024-05-20 00:00:00 1605 322303.2000 2024-09-10 00:00:00 0.053150 0.063025\n", "ISONE 529 71354.4190 2024-06-26 00:00:00 503 60720.3512 2024-08-21 00:00:00 -0.049149 -0.149032\n", "MISO 2226 382090.3300 2024-06-07 04:00:00 2186 378040.1000 2024-09-27 04:00:00 -0.017969 -0.010600\n", - "NYISO 237 27591.4800 2024-05-21 00:00:00 198 24891.9000 2024-08-05 00:00:00 -0.164557 -0.097841\n", + "NYISO 237 27591.4800 2024-05-21 00:00:00 450 75817.1100 2024-10-15 00:00:00 0.898734 1.747845\n", "PJM 2664 203395.8968 2023-07-08 00:00:00 2569 203615.4318 2023-07-08 00:00:00 -0.035661 0.001079\n", "SPP 624 123440.9620 2024-05-07 00:00:00 610 121268.0020 2024-08-02 00:00:00 -0.022436 -0.017603" ] }, - "execution_count": 183, + "execution_count": 201, "metadata": {}, "output_type": "execute_result" } @@ -810,10 +799,22 @@ }, { "cell_type": "code", - "execution_count": 186, + "execution_count": 202, "id": "9e058ac1-ba06-445f-b7a5-2f47b9e7f738", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "AssertionError", + "evalue": "iso_region\nERCOT 0.063025\nISONE 0.149032\nMISO 0.010600\nNYISO 1.747845\nPJM 0.001079\nSPP 0.017603\nName: total_capacity_mw_pct_diff, dtype: float64 substantial change in an ISO's interconneciton queue active capacity.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[202], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m mw_pct_diff \u001b[38;5;241m=\u001b[39m changed_project_aggs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtotal_capacity_mw_pct_diff\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mabs()\n\u001b[1;32m 2\u001b[0m max_change \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0.2\u001b[39m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m mw_pct_diff\u001b[38;5;241m.\u001b[39mlt(max_change)\u001b[38;5;241m.\u001b[39mall(), \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmw_pct_diff\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m substantial change in an ISO\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms interconneciton queue active capacity.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n", + "\u001b[0;31mAssertionError\u001b[0m: iso_region\nERCOT 0.063025\nISONE 0.149032\nMISO 0.010600\nNYISO 1.747845\nPJM 0.001079\nSPP 0.017603\nName: total_capacity_mw_pct_diff, dtype: float64 substantial change in an ISO's interconneciton queue active capacity." + ] + } + ], "source": [ "mw_pct_diff = changed_project_aggs[\"total_capacity_mw_pct_diff\"].abs()\n", "max_change = 0.2\n", @@ -831,7 +832,7 @@ }, { "cell_type": "code", - "execution_count": 187, + "execution_count": 203, "id": "0e0a0cbb-805d-4e8e-a6b2-94b0db2695a3", "metadata": {}, "outputs": [], @@ -842,7 +843,7 @@ }, { "cell_type": "code", - "execution_count": 188, + "execution_count": 204, "id": "1416a051-132f-43e5-8a1f-e0fed1e551bc", "metadata": {}, "outputs": [ @@ -850,16 +851,16 @@ "data": { "text/plain": [ "iso_region\n", - "CAISO 2024-09-30\n", - "ISONE 2024-09-30\n", - "MISO 2024-09-30\n", - "NYISO 2024-09-30\n", + "CAISO 2024-12-31\n", + "ISONE 2024-12-31\n", + "MISO 2024-12-31\n", + "NYISO 2024-12-31\n", "PJM 2027-09-30\n", - "SPP 2024-09-30\n", + "SPP 2024-12-31\n", "Name: report_date, dtype: datetime64[ns]" ] }, - "execution_count": 188, + "execution_count": 204, "metadata": {}, "output_type": "execute_result" } @@ -870,7 +871,7 @@ }, { "cell_type": "code", - "execution_count": 189, + "execution_count": 205, "id": "61987565-1958-4ec0-8717-1a08cbd6e7f2", "metadata": {}, "outputs": [ @@ -906,7 +907,7 @@ }, { "data": { - "image/png": "", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjkAAAJLCAYAAAAFAYenAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAACYBklEQVR4nOzdeXxM1/sH8M9k32RDtookCEnsgog1JRVBS4vaSqpRpdKW1Fo7bW21VlBboi2l+lVtLSENoYgtBBWxhmhJUCRCZT2/P8j9mWabcCU31+f9es3r25lz5pnnnjn3m8e9d87VCCEEiIiIiFRGr7wTICIiInoRWOQQERGRKrHIISIiIlVikUNERESqxCKHiIiIVIlFDhEREakSixwiIiJSJRY5REREpEoscoiIiEiVWOQQERGRKrHIIaJyExERAY1GAxMTE/z9998F2v38/FCvXj3ExsZCT08P48ePLzTO7NmzodFosG3bNq33PS0rKwuLFi1C48aNYWlpCWtra9StWxdDhgxBYmJigZhnzpzBO++8g1deeQXGxsZwcnJC//79cebMGRm2nIjKAoscIip3mZmZmDVrVpHtvr6++OCDDzBv3rwCRcbVq1cxffp09OrVC126dCkyRo8ePfDpp5+iXr16mDVrFqZNm4a2bdtix44dOHTokFbfzZs3o0mTJoiOjsagQYOwdOlSBAcHY8+ePWjSpAl+/vnn59tgIioTGt6gk4jKS0REBAYNGoRGjRrh7NmzuHz5MpycnKR2Pz8/3L59G3/++SfS0tLg6emJGjVq4I8//oBGowEAvPHGG9i3bx/Onj0LR0fHAu8DgKNHj6J58+b44osv8Nlnn2nlkJubi3v37qFy5coAgEuXLqFBgwaoXr069u3bh6pVq0p9b9++jTZt2uDatWs4deoUatSo8ULHh4ieD4/kEFG5++yzz5Cbm1vs0RwrKyssWrQIBw4cwKpVqwAAP//8M3777TfMmjVLKnAKc+nSJQBAq1atCrTp6+tLBQ4AzJ07Fw8fPsSKFSu0ChwAqFKlCr755hs8ePAAc+bMKdU2ElHZY5FDROXOzc0NAwcOxMqVK3H9+vUi++Wfkho7diwuX76MTz75BC1btsQHH3xQbHwXFxcAwLp165CTk1Ns399++w2urq5o06ZNoe1t27aFq6urdP0PESkXixwiUoQJEyYgJycHs2fPLrZfWFgYsrKy0KxZM6SkpOCbb76RTl0VpUWLFmjXrh1WrlyJatWqoV+/fli6dCmSk5O1+qWlpeH69eto2LBhsfEaNGiAv/76C/fv39dt44ioXLDIISJFqFGjBgYMGIAVK1bgxo0bRfZzcXHBlClTcOfOHYSGhhb4FVVhNBoNdu7cic8//xw2Njb44YcfMHz4cLi4uKB37964d+8eAEhFS6VKlYqNl9+enp6u49YRUXlgkUNEijFx4kTk5OQUe20OADRr1gwA0LRpU51jGxsbY8KECTh79iyuX7+OH374AS1atMCPP/6IkJAQAP9fvJR0hEbXYoiIyheLHCJSjBo1auCdd94p8WjO83J0dESfPn2wb98+uLu748cff0ROTg6srKzg6OiIU6dOFfv+U6dO4ZVXXoGlpeULy5GInh+LHCJSlPyjOSVdmyMHQ0NDNGjQANnZ2bh9+zYAoGvXrkhKSsL+/fsLfc8ff/yBK1euoGvXri88PyJ6PixyiEhRatasiXfeeQfffPMNUlJSZIl54cKFAhcZA8C9e/cQGxsLGxsb6efio0ePhqmpKT744AP8888/Wv3v3LmDoUOHwszMDKNHj5YlNyJ6cQzKOwEiov+aMGECvvvuO5w7dw5169Z97ngnT55Ev379EBgYiDZt2sDW1hZ///031q5di+vXr2PhwoXQ19cHALi7u2Pt2rXo378/6tevj+DgYLi5ueHKlStYvXo1bt++jR9++AE1a9Z87ryI6MVikUNEilOrVi288847WLt2rSzx2rZtixkzZmDHjh2YP38+bt26hUqVKqFx48aYPXs2evToodW/V69e8PDwwMyZM6XCpnLlynj11Vfx2Wef6fSLLiIqf7ytAxEREakSr8khIiIiVWKRQ0RERKrEIoeIiIhUqVRFTm5uLiZNmgQ3NzeYmpqiZs2amDFjBp6+rEcIgcmTJ8PR0RGmpqbw9/fHhQsXtOLcuXMH/fv3h6WlJaytrREcHIyMjAytPqdOnUKbNm1gYmICZ2fnQu/4u2nTJnh4eMDExAT169fH9u3bS7M5REREpGKlKnJmz56NZcuWYcmSJTh79ixmz56NOXPm4Ouvv5b6zJkzB4sXL8by5ctx+PBhmJubIyAgAI8ePZL69O/fH2fOnEFUVBS2bt2Kffv2YciQIVJ7eno6OnbsCBcXF8TFxWHu3LmYOnUqVqxYIfU5ePAg+vbti+DgYJw4cQLdu3dH9+7d8eeffz7PeBAREZFKlOrXVV27doW9vT1Wr14tvdajRw+Ympri+++/hxACTk5O+PTTTzFq1CgAj+/qa29vj4iICPTp0wdnz56Fl5cXjh49Kt13JjIyEp07d8Zff/0FJycnLFu2DBMmTEBKSgqMjIwAAOPGjcOWLVuQmJgIAOjduzcePHiArVu3Srm0aNECjRo1wvLly59/ZIiIiKhCK9U6OS1btsSKFStw/vx51K5dGydPnsT+/fsxf/58AEBSUhJSUlLg7+8vvcfKygo+Pj6IjY1Fnz59EBsbC2tra60b6/n7+0NPTw+HDx/Gm2++idjYWLRt21YqcAAgICAAs2fPxt27d2FjY4PY2FiEhoZq5RcQEIAtW7YUmX9mZiYyMzOl53l5ebhz5w4qV64MjUZTmqEgIiKiciKEwP379+Hk5AQ9vaJPSpWqyBk3bhzS09Ph4eEBfX195Obm4osvvkD//v0BQFqC3d7eXut99vb2UltKSgrs7Oy0kzAwgK2trVYfNze3AjHy22xsbJCSklLs5xRm5syZmDZtWmk2mYiIiBTq2rVrqFatWpHtpSpyfvzxR6xbtw7r169H3bp1ER8fjxEjRsDJyQlBQUHPneyLNn78eK2jP2lpaahevTquXbvGuwkTERFVEOnp6XB2dkalSpWK7VeqImf06NEYN24c+vTpAwCoX78+rl69ipkzZyIoKAgODg4AgNTUVDg6OkrvS01NRaNGjQAADg4OuHnzplbcnJwc3LlzR3q/g4MDUlNTtfrkPy+pT357YYyNjWFsbFzgdUtLSxY5REREFUxJl5qU6tdVDx8+LHDuS19fH3l5eQAANzc3ODg4IDo6WmpPT0/H4cOH4evrCwDw9fXFvXv3EBcXJ/XZvXs38vLy4OPjI/XZt28fsrOzpT5RUVGoU6cObGxspD5Pf05+n/zPISIiopecKIWgoCDxyiuviK1bt4qkpCSxefNmUaVKFTFmzBipz6xZs4S1tbX45ZdfxKlTp0S3bt2Em5ub+Pfff6U+nTp1Eo0bNxaHDx8W+/fvF+7u7qJv375S+71794S9vb0YMGCA+PPPP8WGDRuEmZmZ+Oabb6Q+Bw4cEAYGBuKrr74SZ8+eFVOmTBGGhobi9OnTOm9PWlqaACDS0tJKMwxERERUjnT9+12qIic9PV188sknonr16sLExETUqFFDTJgwQWRmZkp98vLyxKRJk4S9vb0wNjYWHTp0EOfOndOK888//4i+ffsKCwsLYWlpKQYNGiTu37+v1efkyZOidevWwtjYWLzyyiti1qxZBfL58ccfRe3atYWRkZGoW7eu2LZtW2k2h0UOERFRBaTr3++X+i7k6enpsLKyQlpaWpHX5OTm5mqdNiNSA319fRgYGHDpBCKqkHT5+w2U8sLjl01GRgb++usvvMR1IKmYmZkZHB0dtdajIiJSExY5RcjNzcVff/0FMzMzVK1alf/iJdUQQiArKwu3bt1CUlIS3N3di11Mi4ioomKRU4Ts7GwIIVC1alWYmpqWdzpEsjI1NYWhoSGuXr2KrKwsmJiYlHdKRESy4z/fSsAjOKRWPHpDRGrH/5cjIiIiVWKRQ0RERKrEa3JKSTOtbE9fiSny/bLrypUrcHNzw4kTJ6TbbJBuNBoNfv75Z3Tv3r28UyEiIh3xSA6RDm7cuIHAwEAAj4tFjUaD+Pj48k2KiIiKxSM5RDoo7savRESkTDySo0J5eXmYM2cOatWqBWNjY1SvXh1ffPFFoX3//PNPBAYGwsLCAvb29hgwYABu374ttUdGRqJ169awtrZG5cqV0bVrV1y6dElqzz+qsXnzZrz66qswMzNDw4YNERsbq3O+Bw4cgJ+fH8zMzGBjY4OAgADcvXu3VJ+/YcMGtGzZEiYmJqhXrx727t0r9cnNzUVwcDDc3NxgamqKOnXqYNGiRQXyWLNmDerWrQtjY2M4OjoiJCREatNoNNiyZQuAxzeiBYDGjRtDo9HAz88P+/btg6GhIVJSUrRijhgxAm3atClxDCIiImBtbY2tW7eiTp06MDMzQ8+ePfHw4UOsXbsWrq6usLGxwccff4zc3FwAwJIlS1CvXj0pxpYtW6DRaLB8+XLpNX9/f0ycOLHEzyciUiMeyVGh8ePHY+XKlViwYAFat26NGzduIDExsUC/e/fuoX379hg8eDAWLFiAf//9F2PHjsXbb7+N3bt3AwAePHiA0NBQNGjQABkZGZg8eTLefPNNxMfHa/0EecKECfjqq6/g7u6OCRMmoG/fvrh48SIMDIqfYvHx8ejQoQPee+89LFq0CAYGBtizZ4/0h1zXzx89ejQWLlwILy8vzJ8/H6+//jqSkpJQuXJl5OXloVq1ati0aRMqV66MgwcPYsiQIXB0dMTbb78NAFi2bBlCQ0Mxa9YsBAYGIi0tDQcOHCg05yNHjqB58+b4/fffUbduXRgZGcHW1hY1atTAd999h9GjRwN4vNbSunXrMGfOHJ2+t4cPH2Lx4sXYsGED7t+/j7feegtvvvkmrK2tsX37dly+fBk9evRAq1at0Lt3b7Rr1w4ff/wxbt26hapVq2Lv3r2oUqUKYmJiMHToUGRnZyM2Nhbjxo3T6fOJiMrDtGnTSuwzZcqUZ4rNIkdl7t+/j0WLFmHJkiUICgoCANSsWROtW7fGlStXtPouWbIEjRs3xpdffim9tmbNGjg7O+P8+fOoXbs2evToofWeNWvWoGrVqkhISNA6ijBq1Ch06dIFwOMJW7duXVy8eBEeHh7F5jtnzhw0bdoUS5culV6rW7eu9N+6fn5ISIjUd9myZYiMjMTq1asxZswYGBoaau1Ebm5uiI2NxY8//igVOZ9//jk+/fRTfPLJJ1K/Zs2aFZpz1apVAQCVK1fWOo0VHByM8PBwqcj57bff8OjRI+kzSpKdnY1ly5ahZs2aAICePXviu+++Q2pqKiwsLODl5YVXX30Ve/bsQe/evVGvXj3Y2tpi79696NmzJ2JiYvDpp59KR6mOHDmC7OxstGzZUqfPJyJSG56uUpmzZ88iMzMTHTp0KLHvyZMnsWfPHlhYWEiP/KIk/5TQhQsX0LdvX9SoUQOWlpZwdXUFACQnJ2vFatCggfTfjo6OAICbN2+WmEP+kZyi6Pr5vr6+0n8bGBigadOmOHv2rPRaWFgYvL29UbVqVVhYWGDFihVSjJs3b+L69es6jVlx3n33XVy8eBGHDh0C8PgU1Ntvvw1zc3Od3m9mZiYVOABgb28PV1dXWFhYaL2WP64ajQZt27ZFTEwM7t27h4SEBHz44YfIzMxEYmIi9u7di2bNmsHMzOy5touIqKLikRyVKc0tKDIyMvD6669j9uzZBdryC5XXX38dLi4uWLlyJZycnJCXl4d69eohKytLq7+hoaH03/mrROfl5T13vrp+fnE2bNiAUaNGYd68efD19UWlSpUwd+5cHD58WKccdGVnZ4fXX38d4eHhcHNzw44dOxATE6Pz+58eQ+DxOBb22tPj6ufnhxUrVuCPP/5A48aNYWlpKRU+e/fuRbt27Z5rm4iIKjIeyVEZd3d3mJqaIjo6usS+TZo0wZkzZ+Dq6opatWppPczNzfHPP//g3LlzmDhxIjp06ABPT0/pgmC5NGjQoMhcS/P5+UdPACAnJwdxcXHw9PQE8PjC5pYtW+LDDz9E48aNUatWLa2LlytVqgRXV1edxgyAdNfu/OuGnjZ48GBs3LgRK1asQM2aNdGqVSudYj6rdu3aISEhAZs2bYKfnx+Ax4XP77//Ll3QTUT0smKRozImJiYYO3YsxowZg2+//RaXLl3CoUOHsHr16gJ9hw8fjjt37qBv3744evQoLl26hJ07d2LQoEHIzc2FjY0NKleujBUrVuDixYvYvXs3QkNDZc13/PjxOHr0KD788EOcOnUKiYmJWLZsGW7fvl2qzw8LC8PPP/+MxMREDB8+HHfv3sV7770H4HHhd+zYMezcuRPnz5/HpEmTcPToUa33T506FfPmzcPixYtx4cIFHD9+HF9//XWhn2VnZwdTU1NERkYiNTUVaWlpUltAQAAsLS3x+eefY9CgQTKNUtEaNGgAGxsbrF+/XqvI2bJlCzIzM194kUVEpGQ8XVVKcq5A/KJMmjQJBgYGmDx5Mq5fvw5HR0cMHTq0QD8nJyccOHAAY8eORceOHZGZmQkXFxd06tQJenp60k+zP/74Y9SrVw916tTB4sWLZT06ULt2bezatQufffYZmjdvDlNTU/j4+KBv377Q09PT+fNnzZqFWbNmIT4+HrVq1cKvv/6KKlWqAAA++OADnDhxAr1794ZGo0Hfvn3x4YcfYseOHdL7g4KC8OjRIyxYsACjRo1ClSpV0LNnz0JzNjAwwOLFizF9+nRMnjwZbdq0kU5L6enp4d1338WXX36JgQMHyjZORdFoNGjTpg22bduG1q1bA3hc+FhaWqJOnTo6Xw9ERKRGGiGE8v9qvyDp6emwsrJCWloaLC0ttdoePXqEpKQkuLm5wcTEpJwypJIo8VYVwcHBuHXrFn799dfyTqVYnONEpATP8hPy4v5+P41HcohkkpaWhtOnT2P9+vWKL3CIiF4GvCaHXqj81ZQLezy9Po8adOvWDR07dsTQoUPx2muvabW9TONARKQUPJJDL9SqVavw77//Ftpma2v73PFdXV2hlDOuxf1c/EWPAxERFcQih16oV155pbxTUASOAxFR2ePpKiIiIlIlFjlERESkSixyiIiISJVY5BAREZEqscghIiIiVWKRU1oaTdk+SkkIgSFDhsDW1hYajQbx8fHyj8ETGo0GW7ZsAfB45eEX/XllzdXVFQsXLizvNIiI6BnxJ+QqExkZiYiICMTExKBGjRrS/ZtehBs3bsDGxuaFxS9vR48e1br3k0ajwc8//4zu3buXX1JERKQzFjkqc+nSJTg6OqJly5Yv/LMcHBxe+GeUp6pVq5Z3CkRE9Bx4ukpF3n33XXz00UdITk6GRqOBq6srMjMz8fHHH8POzg4mJiZo3bo1jh49Kr3n7t276N+/P6pWrQpTU1O4u7sjPDwcAJCVlYWQkBA4OjrCxMQELi4umDlzpvTep09XldaZM2fQtWtXWFpaolKlSmjTpg0uXboE4PERlNdeew1VqlSBlZUV2rVrh+PHj2u9X6PRYNmyZQgMDISpqSlq1KiBn376SavP2LFjUbt2bZiZmaFGjRqYNGkSsrOztfr89ttvaNasGUxMTFClShW8+eabUtvTp6tcXV0BAG+++aY0tleuXIGenh6OHTumFXPhwoVwcXFBXl5esWMQExMDjUaDnTt3onHjxjA1NUX79u1x8+ZN7NixA56enrC0tES/fv3w8OFDAMDWrVthbW2N3NxcAEB8fDw0Gg3GjRsnxR08eDDeeeedYj+biOhlwCJHRRYtWoTp06ejWrVquHHjBo4ePYoxY8bgf//7H9auXYvjx4+jVq1aCAgIwJ07dwAAkyZNQkJCAnbs2IGzZ89i2bJl0imuxYsX49dff8WPP/6Ic+fOYd26ddIf++fx999/o23btjA2Nsbu3bsRFxeH9957Dzk5OQCA+/fvIygoCPv378ehQ4fg7u6Ozp074/79+1pxJk2ahB49euDkyZPo378/+vTpg7Nnz0rtlSpVQkREBBISErBo0SKsXLkSCxYskNq3bduGN998E507d8aJEycQHR2N5s2bF5pzfmEYHh4uja2rqyv8/f2lojBfeHg43n33Xejp6bZ7TZ06FUuWLMHBgwdx7do1vP3221i4cCHWr1+Pbdu2YdeuXfj6668BAG3atMH9+/dx4sQJAMDevXtRpUoVrVtK7N27F35+fjp9NhGRmvF0lYpYWVmhUqVK0NfXh4ODAx48eIBly5YhIiICgYGBAICVK1ciKioKq1evxujRo5GcnIzGjRujadOmAKBVxCQnJ8Pd3R2tW7eGRqOBi4uLLHmGhYXBysoKGzZsgKGhIQCgdu3aUnv79u21+q9YsQLW1tbYu3cvunbtKr3eq1cvDB48GAAwY8YMREVF4euvv8bSpUsBABMnTpT6urq6YtSoUdiwYQPGjBkDAPjiiy/Qp08fTJs2TerXsGHDQnPOP3VlbW2tdZpu8ODBGDp0KObPnw9jY2McP34cp0+fxi+//KLzeHz++edo1aoVACA4OBjjx4/HpUuXUKNGDQBAz549sWfPHowdOxZWVlZo1KgRYmJi0LRpU8TExGDkyJGYNm0aMjIykJaWhosXL6Jdu3Y6fz4RkVrxSI6KXbp0CdnZ2dIfUAAwNDRE8+bNpSMew4YNw4YNG9CoUSOMGTMGBw8elPq+++67iI+PR506dfDxxx9j165dsuQVHx+PNm3aSAXOf6WmpuL999+Hu7s7rKysYGlpiYyMDCQnJ2v18/X1LfD86SM5GzduRKtWreDg4AALCwtMnDhRK0Z8fDw6dOjwXNvSvXt36Ovr4+effwYARERE4NVXXy3VEa8GDRpI/21vby+dXnv6tZs3b0rP27Vrh5iYGAgh8Mcff+Ctt96Cp6cn9u/fj71798LJyQnu7u7PtV1ERGrAIuclFxgYiKtXr2LkyJG4fv06OnTogFGjRgEAmjRpgqSkJMyYMQP//vsv3n77bfTs2fO5P9PU1LTY9qCgIMTHx2PRokU4ePAg4uPjUblyZWRlZen8GbGxsejfvz86d+6MrVu34sSJE5gwYYJWjJLy0IWRkREGDhyI8PBwZGVlYf369XjvvfdKFePpYk+j0RQo/jQajdb1PX5+fti/fz9OnjwJQ0NDeHh4wM/PDzExMdi7dy+P4hARPVGqIsfV1RUajabAY/jw4QCAR48eYfjw4ahcuTIsLCzQo0cPpKamasVITk5Gly5dYGZmBjs7O4wePVq6FiNfTEwMmjRpAmNjY9SqVQsREREFcgkLC4OrqytMTEzg4+ODI0eOlHLT1a9mzZowMjLCgQMHpNeys7Nx9OhReHl5Sa9VrVoVQUFB+P7777Fw4UKsWLFCarO0tETv3r2xcuVKbNy4Ef/73/+k63meVYMGDfDHH38UuAg434EDB/Dxxx+jc+fOqFu3LoyNjXH79u0C/Q4dOlTguaenJwDg4MGDcHFxwYQJE9C0aVO4u7vj6tWrBfKIjo7WOW9DQ0Ppgt+nDR48GL///juWLl2KnJwcvPXWWzrHfBb51+UsWLBAKmjyi5yYmBhej0NE9ESpipyjR4/ixo0b0iMqKgrA42sjAGDkyJH47bffsGnTJuzduxfXr1/X+j/83NxcdOnSBVlZWTh48CDWrl2LiIgITJ48WeqTlJSELl264NVXX0V8fDxGjBiBwYMHY+fOnVKfjRs3IjQ0FFOmTMHx48fRsGFDBAQEaB3SJ8Dc3BzDhg3D6NGjERkZiYSEBLz//vt4+PAhgoODAQCTJ0/GL7/8gosXL+LMmTPYunWrVCjMnz8fP/zwAxITE3H+/Hls2rQJDg4OsLa2fq68QkJCkJ6ejj59+uDYsWO4cOECvvvuO5w7dw4A4O7uju+++w5nz57F4cOH0b9//0KPumzatAlr1qzB+fPnMWXKFBw5cgQhISFSjOTkZGzYsAGXLl3C4sWLpVNK+aZMmYIffvgBU6ZMwdmzZ3H69GnMnj27yLxdXV0RHR2NlJQU3L17V3rd09MTLVq0wNixY9G3b19ZjhAVx8bGBg0aNMC6deukgqZt27Y4fvw4zp8/zyM5RERPlKrIqVq1KhwcHKTH1q1bUbNmTbRr1w5paWlYvXo15s+fj/bt28Pb2xvh4eE4ePCg9C/uXbt2ISEhAd9//z0aNWqEwMBAzJgxA2FhYdJphOXLl8PNzQ3z5s2Dp6cnQkJC0LNnT61fxcyfPx/vv/8+Bg0aBC8vLyxfvhxmZmZYs2aNjENTBCHK9vGcZs2ahR49emDAgAFo0qQJLl68iJ07d0qL+BkZGWH8+PFo0KAB2rZtC319fWzYsAHA418nzZkzB02bNkWzZs1w5coVbN++XedfDRWlcuXK2L17NzIyMtCuXTt4e3tj5cqV0mma1atX4+7du2jSpAkGDBgg/QT+v6ZNm4YNGzagQYMG+Pbbb/HDDz9IR6jeeOMNjBw5EiEhIWjUqBEOHjyISZMmab3fz88PmzZtwq+//opGjRqhffv2xR4RnDdvHqKiouDs7IzGjRtrtQUHByMrK6vUp6qeVbt27ZCbmysVOba2tvDy8oKDgwPq1KlTJjkQESmdRohn+0ualZUFJycnhIaG4rPPPsPu3bvRoUMH3L17V+tf+i4uLhgxYgRGjhyJyZMn49dff9Va+j8pKQk1atTA8ePH0bhxY7Rt2xZNmjTRWk4/PDwcI0aMQFpaGrKysmBmZoaffvpJa+XZoKAg3Lt3r9hftWRmZiIzM1N6np6eDmdnZ6SlpcHS0lKr76NHj5CUlAQ3NzeYmJg8yxDRC6S01YdnzJiBTZs24dSpU+Wdis44x4lePk//mrQoU6ZMKYNM/t+z5JSeng4rK6tC/34/7Zn/Sb5lyxbcu3cP7777LgAgJSUFRkZGBU5l2NvbIyUlRepjb29foD2/rbg+6enp+Pfff3H79m3k5uYW2ic/RlFmzpwJKysr6eHs7FyqbSb6r4yMDPz5559YsmQJPvroo/JOh4iInvLMRc7q1asRGBgIJycnOfN5ocaPH4+0tDTpce3atfJOSZWGDh0KCwuLQh9Dhw4t7/RkFRISAm9vb/j5+RU4VfUyjQMRkRI902KAV69exe+//47NmzdLrzk4OCArKwv37t3TOpqTmpoqLZ7m4OBQ4JqH/F9fPd3nv7/ISk1NhaWlJUxNTaGvrw99ff1C+5R0LyVjY2MYGxuXbmOp1KZPny79DP2/ijusWBrPeJZVdhEREYX++g8om3EgopeDEk8zVQTPVOSEh4fDzs4OXbp0kV7z9vaGoaEhoqOj0aNHDwDAuXPnkJycLC3a5uvriy+++AI3b96ULiSNioqCpaWldMGor68vtm/frvV5UVFRUgwjIyN4e3sjOjpauh4jLy8P0dHR0i9rqHzZ2dkVeqHwy4bjQERUvkpd5OTl5SE8PBxBQUEwMPj/t1tZWSE4OBihoaGwtbWFpaUlPvroI/j6+qJFixYAgI4dO8LLywsDBgzAnDlzkJKSgokTJ2L48OHSEZahQ4diyZIlGDNmDN577z3s3r0bP/74I7Zt2yZ9VmhoKIKCgtC0aVM0b94cCxcuxIMHDzBo0KDnHQ8iIiJSiVIXOb///juSk5ML/ansggULoKenhx49eiAzMxMBAQHSfYQAQF9fH1u3bsWwYcPg6+sLc3NzBAUFYfr06VIfNzc3bNu2DSNHjsSiRYtQrVo1rFq1CgEBAVKf3r1749atW5g8eTJSUlLQqFEjREZGFrgYmYiIiF5epS5yOnbsWOT1ECYmJggLC0NYWFiR73dxcSlwOuq//Pz8pLssFyUkJISnp4iIiKhIvHcVERERqRKLHCIiIlIlFjkE4PEpwhEjRpR3GooUERHx3PfrIiKisvdMPyF/memyVoGc5F73ICYmBq+++mqB229Q0Xr37o3OnTtLz6dOnYotW7Zo3Z6EiIiUh0UOvTBZWVkwMjIq7zSem6mp6Qu/szgREcmPp6tUKDMzU7pzt4mJCVq3bo2jR4/iypUrePXVVwEANjY20Gg00r3HgMdrII0ZMwa2trZwcHDA1KlTteLeu3cPgwcPRtWqVWFpaYn27dvj5MmTUvvUqVPRqFEjrFq1SuebPubl5WHOnDmoVasWjI2NUb16dXzxxRdS+9ixY1G7dm2YmZmhRo0amDRpErKzswt85jfffANnZ2eYmZnh7bffRlpamtTn6NGjeO2111ClShVYWVmhXbt2OH78eIFt++CDD2Bvbw8TExPUq1cPW7duBaB9uioiIgLTpk3DyZMnodFooNFoEBERgffeew9du3bVipmdnQ07OzusXr26xHHw8/PDRx99hBEjRsDGxgb29vZYuXKltP5TpUqVUKtWLezYsUN6T9OmTfHVV19Jz7t37w5DQ0NkZGQAAP766y9oNBpcvHixxM8nIlIjFjkqNGbMGPzvf//D2rVrcfz4cdSqVQsBAQGoVKkS/ve//wF4vBr1jRs3sGjRIul9a9euhbm5OQ4fPow5c+Zg+vTpiIqKktp79eqFmzdvYseOHYiLi0OTJk3QoUMH3LlzR+pz8eJF/O9//8PmzZt1Op0zfvx4zJo1C5MmTUJCQgLWr1+vtd5RpUqVEBERgYSEBCxatAgrV67EggULtGJcvHgRP/74I3777TdERkbixIkT+PDDD6X2+/fvIygoCPv378ehQ4fg7u6Ozp074/79+wAeF1qBgYE4cOAAvv/+eyQkJGDWrFnQ19cvkG/v3r3x6aefom7durhx4wZu3LiB3r17Y/DgwYiMjMSNGzekvlu3bsXDhw/Ru3fvEscBeDz+VapUwZEjR/DRRx9h2LBh6NWrF1q2bInjx4+jY8eOGDBgAB4+fAgAaNeuHWJiYgA8vs3FH3/8AWtra+zfvx8AsHfvXrzyyiuoVauWTp9PRKQ2PF2lMg8ePMCyZcsQERGBwMBAAMDKlSsRFRWFNWvWoFmzZgAe33Lgv9fkNGjQQLoGyN3dHUuWLEF0dDRee+017N+/H0eOHMHNmzel1am/+uorbNmyBT/99BOGDBkC4PEpqm+//RZVq1YtMdf79+9j0aJFWLJkCYKCggAANWvWROvWraU+EydOlP7b1dUVo0aNwoYNGzBmzBjp9UePHuHbb7/FK6+8AgD4+uuv0aVLF8ybNw8ODg5o37691ueuWLEC1tbW2Lt3L7p27Yrff/8dR44cwdmzZ1G7dm0AQI0aNQrN2dTUFBYWFjAwMNC6V1rLli1Rp04dfPfdd1Ju4eHh6NWrFywsLEocCwBo2LChtL35xV+VKlXw/vvvAwAmT56MZcuW4dSpU2jRogX8/PywevVq5Obm4s8//4SRkRF69+6NmJgYdOrUCTExMWjXrp1On01EpEY8kqMyly5dQnZ2Nlq1aiW9ZmhoiObNm+Ps2bPFvrdBgwZazx0dHXHz5k0AwMmTJ5GRkYHKlStr3U07KSkJly5dkt7j4uKiU4EDAGfPnkVmZiY6dOhQZJ+NGzeiVatWcHBwgIWFBSZOnIjk5GStPtWrV5cKHODx/c/y8vJw7tw5AI9v3vr+++/D3d0dVlZWsLS0REZGhhQnPj4e1apVkwqcZzV48GCEh4dLn7ljx45CVwYvytPjr6+vj8qVK6N+/frSa/lHuPK/kzZt2uD+/fs4ceIE9u7di3bt2sHPz086urN37174+fk91zYREVVkPJJDEkNDQ63nGo0GeXl5AICMjAw4OjpKf0Cf9vQRIXNzc50/r6SLeWNjY9G/f39MmzYNAQEBsLKywoYNGzBv3jydPwMAgoKC8M8//2DRokVwcXGBsbExfH19kZWVpVMeuho4cCDGjRuH2NhYHDx4EG5ubmjTpo3O7y9s/J9+TaPRAID0nVhbW6Nhw4aIiYlBbGwsXnvtNbRt2xa9e/fG+fPnceHCBR7JIaKXGo/kqEzNmjVhZGSEAwcOSK9lZ2fj6NGj8PLykn7tlJubW6q4TZo0QUpKCgwMDFCrVi2tR5UqVZ4pV3d3d5iamiI6OrrQ9oMHD8LFxQUTJkxA06ZN4e7ujqtXrxbol5ycjOvXr0vPDx06BD09PdSpUwcAcODAAXz88cfo3Lkz6tatC2NjY9y+fVvq36BBA/z11184f/68TnkbGRkVOn6VK1dG9+7dER4ejoiIiDK5YWy7du2wZ88e7Nu3D35+frC1tYWnpye++OILODo6PvfRKSKiioxFjsqYm5tj2LBhGD16NCIjI5GQkID3338fDx8+RHBwMFxcXKDRaLB161bcunVL+iVOSfz9/eHr64vu3btj165duHLlCg4ePIgJEybg2LFjz5SriYkJxo4dizFjxuDbb7/FpUuXcOjQIenXSO7u7khOTsaGDRtw6dIlLF68GD///HOhcYKCgnDy5En88ccf+Pjjj/H2229L18y4u7vju+++w9mzZ3H48GH0799f6+hNu3bt0LZtW/To0QNRUVFISkrCjh07EBkZWWjerq6uSEpKQnx8PG7fvo3MzEypbfDgwVi7di3Onj0rXWf0Ivn5+WHnzp0wMDCAh4eH9Nq6det4FIeIXno8XVVKci/O9yLMmjULeXl5GDBgAO7fv4+mTZti586dsLGxgY2NDaZNm4Zx48Zh0KBBGDhwICIiIkqMqdFosH37dkyYMAGDBg3CrVu34ODggLZt2z7X3d8nTZoEAwMDTJ48GdevX4ejoyOGDh0KAHjjjTcwcuRIhISEIDMzE126dMGkSZMK/LS9Vq1aeOutt9C5c2fcuXMHXbt2xdKlS6X21atXY8iQIWjSpAmcnZ3x5ZdfYtSoUVox/ve//2HUqFHo27cvHjx4gFq1amHWrFmF5tyjRw9s3rwZr776Ku7du4fw8HDpp/j+/v5wdHRE3bp14eTk9Mzjoqs2bdogLy9Pq6Dx8/PDokWLeD0OEb30NKKoW4q/BNLT02FlZYW0tDRYWlpqtT169AhJSUk6r/dC5UNpqw9nZGTglVdeQXh4ON56663yTqdYnONEFYcuq+3r8o9wueLI6VlyKu7v99N4JIdIBnl5ebh9+zbmzZsHa2trvPHGG+WdEhHRS49FDr0wycnJ8PLyKrI9ISEB1atXL8OMXpzk5GS4ubmhWrVqiIiIgIGBgVbbyzIORERKwiKHXhgnJ6diTyPJcc3K1KlTC1yjUx5cXV1R1JnfshgHIiIqiEUOvTD5Pzd/2XEciIjKB39CXoKX+LpsUjnObSJSOxY5Rci/OWP+qrhEapN/o8//rrRMRKQWPF1VBAMDA5iZmeHWrVswNDSEnh7rQVIHIQQePnyImzdvwtrautC7rRMRqQGLnCJoNBo4OjoiKSmp0FsJEFV01tbWWndSJyJ56bL+C1AxFpmtqFjkFMPIyAju7u48ZUWqY2hoyCM4RKR6LHJKoKenx9VgiYiIKiBeaEJERESqxCKHiIiIVIlFDhEREakSixwiIiJSJRY5REREpEoscoiIiEiVWOQQERGRKrHIISIiIlVikUNERESqxCKHiIiIVIlFDhEREalSqYucv//+G++88w4qV64MU1NT1K9fH8eOHZPahRCYPHkyHB0dYWpqCn9/f1y4cEErxp07d9C/f39YWlrC2toawcHByMjI0Opz6tQptGnTBiYmJnB2dsacOXMK5LJp0yZ4eHjAxMQE9evXx/bt20u7OURERKRSpSpy7t69i1atWsHQ0BA7duxAQkIC5s2bBxsbG6nPnDlzsHjxYixfvhyHDx+Gubk5AgIC8OjRI6lP//79cebMGURFRWHr1q3Yt28fhgwZIrWnp6ejY8eOcHFxQVxcHObOnYupU6dixYoVUp+DBw+ib9++CA4OxokTJ9C9e3d0794df/755/OMBxEREalEqe5CPnv2bDg7OyM8PFx6zc3NTfpvIQQWLlyIiRMnolu3bgCAb7/9Fvb29tiyZQv69OmDs2fPIjIyEkePHkXTpk0BAF9//TU6d+6Mr776Ck5OTli3bh2ysrKwZs0aGBkZoW7duoiPj8f8+fOlYmjRokXo1KkTRo8eDQCYMWMGoqKisGTJEixfvvz5RoWIiIgqvFIdyfn111/RtGlT9OrVC3Z2dmjcuDFWrlwptSclJSElJQX+/v7Sa1ZWVvDx8UFsbCwAIDY2FtbW1lKBAwD+/v7Q09PD4cOHpT5t27aFkZGR1CcgIADnzp3D3bt3pT5Pf05+n/zPKUxmZibS09O1HkRERKROpTqSc/nyZSxbtgyhoaH47LPPcPToUXz88ccwMjJCUFAQUlJSAAD29vZa77O3t5faUlJSYGdnp52EgQFsbW21+jx9hOjpmCkpKbCxsUFKSkqxn1OYmTNnYtq0aaXZZCIiesno8ndiypQpZZAJPa9SFTl5eXlo2rQpvvzySwBA48aN8eeff2L58uUICgp6IQnKafz48QgNDZWep6enw9nZuRwzIiIiubA4of8q1ekqR0dHeHl5ab3m6emJ5ORkAICDgwMAIDU1VatPamqq1Obg4ICbN29qtefk5ODOnTtafQqL8fRnFNUnv70wxsbGsLS01HoQERGROpWqyGnVqhXOnTun9dr58+fh4uIC4PFFyA4ODoiOjpba09PTcfjwYfj6+gIAfH19ce/ePcTFxUl9du/ejby8PPj4+Eh99u3bh+zsbKlPVFQU6tSpI/2Sy9fXV+tz8vvkfw4RERG93EpV5IwcORKHDh3Cl19+iYsXL2L9+vVYsWIFhg8fDgDQaDQYMWIEPv/8c/z66684ffo0Bg4cCCcnJ3Tv3h3A4yM/nTp1wvvvv48jR47gwIEDCAkJQZ8+feDk5AQA6NevH4yMjBAcHIwzZ85g48aNWLRokdappk8++QSRkZGYN28eEhMTMXXqVBw7dgwhISEyDQ0RERFVZKW6JqdZs2b4+eefMX78eEyfPh1ubm5YuHAh+vfvL/UZM2YMHjx4gCFDhuDevXto3bo1IiMjYWJiIvVZt24dQkJC0KFDB+jp6aFHjx5YvHix1G5lZYVdu3Zh+PDh8Pb2RpUqVTB58mSttXRatmyJ9evXY+LEifjss8/g7u6OLVu2oF69es8zHkRERKQSpSpyAKBr167o2rVrke0ajQbTp0/H9OnTi+xja2uL9evXF/s5DRo0wB9//FFsn169eqFXr17FJ0xEREQvJd67ioiIiFSJRQ4RERGpUqlPVxEREcmFa9uUramYWmKfKVDPePNIDhEREakSj+QQEREp3Mt2BEYuPJJDREREqsQih4iIiFSJRQ4RERGpEoscIiIiUiUWOURERKRKLHKIiIhIlVjkEBERkSqxyCEiIiJVYpFDREREqsQih4iIiFSJRQ4RERGpEoscIiIiUiUWOURERKRKLHKIiIhIlVjkEBERkSqxyCEiIiJVYpFDREREqsQih4iIiFSJRQ4RERGpEoscIiIiUiUWOURERKRKLHKIiIhIlVjkEBERkSoZlHcCREREajQVU3XqNwVTXmwiLzEeySEiIiJVYpFDREREqsQih4iIiFSJRQ4RERGpEoscIiIiUiUWOURERKRK/Ak5ERGV2rRp00rsM2UKfxpN5atUR3KmTp0KjUaj9fDw8JDaHz16hOHDh6Ny5cqwsLBAjx49kJqaqhUjOTkZXbp0gZmZGezs7DB69Gjk5ORo9YmJiUGTJk1gbGyMWrVqISIiokAuYWFhcHV1hYmJCXx8fHDkyJHSbAoRERGpXKmP5NStWxe///77/wcw+P8QI0eOxLZt27Bp0yZYWVkhJCQEb731Fg4cOAAAyM3NRZcuXeDg4ICDBw/ixo0bGDhwIAwNDfHll18CAJKSktClSxcMHToU69atQ3R0NAYPHgxHR0cEBAQAADZu3IjQ0FAsX74cPj4+WLhwIQICAnDu3DnY2dk914AQEVHZmTJ1qg6deERIzXRZNPFZF0ws9TU5BgYGcHBwkB5VqlQBAKSlpWH16tWYP38+2rdvD29vb4SHh+PgwYM4dOgQAGDXrl1ISEjA999/j0aNGiEwMBAzZsxAWFgYsrKyAADLly+Hm5sb5s2bB09PT4SEhKBnz55YsGCBlMP8+fPx/vvvY9CgQfDy8sLy5cthZmaGNWvWFJt7ZmYm0tPTtR5ERESkTqUuci5cuAAnJyfUqFED/fv3R3JyMgAgLi4O2dnZ8Pf3l/p6eHigevXqiI2NBQDExsaifv36sLe3l/oEBAQgPT0dZ86ckfo8HSO/T36MrKwsxMXFafXR09ODv7+/1KcoM2fOhJWVlfRwdnYu7eYTERFRBVGqIsfHxwcRERGIjIzEsmXLkJSUhDZt2uD+/ftISUmBkZERrK2ttd5jb2+PlJQUAEBKSopWgZPfnt9WXJ/09HT8+++/uH37NnJzcwvtkx+jKOPHj0daWpr0uHbtWmk2n4iIiCqQUl2TExgYKP13gwYN4OPjAxcXF/z4448wNTWVPTm5GRsbw9jYuLzTICIiojLwXOvkWFtbo3bt2rh48SIcHByQlZWFe/fuafVJTU2Fg4MDAMDBwaHAr63yn5fUx9LSEqampqhSpQr09fUL7ZMfg4iIiOi5ipyMjAxcunQJjo6O8Pb2hqGhIaKjo6X2c+fOITk5Gb6+vgAAX19fnD59Gjdv3pT6REVFwdLSEl5eXlKfp2Pk98mPYWRkBG9vb60+eXl5iI6OlvoQERERlep01ahRo/D666/DxcUF169fx5QpU6Cvr4++ffvCysoKwcHBCA0Nha2tLSwtLfHRRx/B19cXLVq0AAB07NgRXl5eGDBgAObMmYOUlBRMnDgRw4cPl04jDR06FEuWLMGYMWPw3nvvYffu3fjxxx+xbds2KY/Q0FAEBQWhadOmaN68ORYuXIgHDx5g0KBBMg4NERFVJPw5Ov1XqYqcv/76C3379sU///yDqlWronXr1jh06BCqVq0KAFiwYAH09PTQo0cPZGZmIiAgAEuXLpXer6+vj61bt2LYsGHw9fWFubk5goKCMH36dKmPm5sbtm3bhpEjR2LRokWoVq0aVq1aJa2RAwC9e/fGrVu3MHnyZKSkpKBRo0aIjIwscDEyERFRab3IdVuobJWqyNmwYUOx7SYmJggLC0NYWFiRfVxcXLB9+/Zi4/j5+eHEiRPF9gkJCUFISEixfYiIiOjlxRt0EhERkSrxBp1ERC8JXW6qCfDGmqQePJJDREREqsQih4iIiFSJp6uIiBROl9NMPMVEVBCLHCIiKjeaqSX3EeUQi9SBp6uIiIhIlVjkEBERkSqxyCEiIiJVYpFDREREqsQih4iIiFSJRQ4RERGpEoscIiIiUiUWOURERKRKLHKIiIhIlVjkEBERkSqxyCEiIiJVYpFDREREqsQih4iIiFSJRQ4RERGpEoscIiIiUiWD8k6AiIgqnilTp+rQacoLz4OoODySQ0RERKrEIoeIiIhUiUUOERERqRKLHCIiIlIlFjlERESkSixyiIiISJVY5BAREZEqcZ0cIqIXZNq0aSX2mcK1ZIheGB7JISIiIlVikUNERESqxCKHiIiIVInX5BARvSR0ut8UwHtOkWqwyCEiUjjeDJPo2TzX6apZs2ZBo9FgxIgR0muPHj3C8OHDUblyZVhYWKBHjx5ITU3Vel9ycjK6dOkCMzMz2NnZYfTo0cjJydHqExMTgyZNmsDY2Bi1atVCREREgc8PCwuDq6srTExM4OPjgyNHjjzP5hARkY40U0t+EJW3Zy5yjh49im+++QYNGjTQen3kyJH47bffsGnTJuzduxfXr1/HW2+9JbXn5uaiS5cuyMrKwsGDB7F27VpERERg8uTJUp+kpCR06dIFr776KuLj4zFixAgMHjwYO3fulPps3LgRoaGhmDJlCo4fP46GDRsiICAAN2/efNZNIiIiIhV5piInIyMD/fv3x8qVK2FjYyO9npaWhtWrV2P+/Plo3749vL29ER4ejoMHD+LQoUMAgF27diEhIQHff/89GjVqhMDAQMyYMQNhYWHIysoCACxfvhxubm6YN28ePD09ERISgp49e2LBggXSZ82fPx/vv/8+Bg0aBC8vLyxfvhxmZmZYs2bN84wHERERqcQzFTnDhw9Hly5d4O/vr/V6XFwcsrOztV738PBA9erVERsbCwCIjY1F/fr1YW9vL/UJCAhAeno6zpw5I/X5b+yAgAApRlZWFuLi4rT66Onpwd/fX+pTmMzMTKSnp2s9iIiISJ1KfeHxhg0bcPz4cRw9erRAW0pKCoyMjGBtba31ur29PVJSUqQ+Txc4+e35bcX1SU9Px7///ou7d+8iNze30D6JiYlF5j5z5kydViAlopeXrv8fwZWKiZSvVEdyrl27hk8++QTr1q2DiYnJi8rphRk/fjzS0tKkx7Vr18o7JSIiInpBSlXkxMXF4ebNm2jSpAkMDAxgYGCAvXv3YvHixTAwMIC9vT2ysrJw7949rfelpqbCwcEBAODg4FDg11b5z0vqY2lpCVNTU1SpUgX6+vqF9smPURhjY2NYWlpqPYiIiEidSnW6qkOHDjh9+rTWa4MGDYKHhwfGjh0LZ2dnGBoaIjo6Gj169AAAnDt3DsnJyfD19QUA+Pr64osvvsDNmzdhZ2cHAIiKioKlpSW8vLykPtu3b9f6nKioKCmGkZERvL29ER0dje7duwMA8vLyEB0djZCQkFIOARGpAW+GSUT/Vaoip1KlSqhXr57Wa+bm5qhcubL0enBwMEJDQ2FrawtLS0t89NFH8PX1RYsWLQAAHTt2hJeXFwYMGIA5c+YgJSUFEydOxPDhw2FsbAwAGDp0KJYsWYIxY8bgvffew+7du/Hjjz9i27Zt0ueGhoYiKCgITZs2RfPmzbFw4UI8ePAAgwYNeq4BISIiInWQfcXjBQsWQE9PDz169EBmZiYCAgKwdOlSqV1fXx9bt27FsGHD4OvrC3NzcwQFBWH69OlSHzc3N2zbtg0jR47EokWLUK1aNaxatQoBAQFSn969e+PWrVuYPHkyUlJS0KhRI0RGRha4GJmIiIheTs9d5MTExGg9NzExQVhYGMLCwop8j4uLS4HTUf/l5+eHEydOFNsnJCSEp6eIiIioULwLOREREakSixwiIiJSJd6FnIjoJaHrTTPFC82CqOywyCEiUjhdihMWJkQF8XQVERERqRKP5BBRqXHhPSKqCHgkh4iIiFSJRQ4RERGpEoscIiIiUiVek0P0ElHitTRKzImI1IFHcoiIiEiVWOQQERGRKvF0FRHRCzJl6lQdOvFUHNGLwiM5REREpEoscoiIiEiVWOQQERGRKrHIISIiIlVikUNERESqxCKHiIiIVIlFDhEREakSixwiIiJSJRY5REREpEpc8ZiI6Ck6rVIMcKViogqAR3KIiIhIlVjkEBERkSqxyCEiIiJV4jU5RFRqvLs2EVUELHKIiF4QzdSS+4gXngXRy4tFDhGpAo8uEdF/8ZocIiIiUiUeySF6ifBoBxG9TFjkEFG5YuFFRC8KT1cRERGRKrHIISIiIlVikUNERESqVKprcpYtW4Zly5bhypUrAIC6deti8uTJCAwMBAA8evQIn376KTZs2IDMzEwEBARg6dKlsLe3l2IkJydj2LBh2LNnDywsLBAUFISZM2fCwOD/U4mJiUFoaCjOnDkDZ2dnTJw4Ee+++65WLmFhYZg7dy5SUlLQsGFDfP3112jevPkzDgPR/5s2bVqJfabocI2IXHHkjkVE9LIo1ZGcatWqYdasWYiLi8OxY8fQvn17dOvWDWfOnAEAjBw5Er/99hs2bdqEvXv34vr163jrrbek9+fm5qJLly7IysrCwYMHsXbtWkRERGDy5MlSn6SkJHTp0gWvvvoq4uPjMWLECAwePBg7d+6U+mzcuBGhoaGYMmUKjh8/joYNGyIgIAA3b9583vEgIiIilSjVkZzXX39d6/kXX3yBZcuW4dChQ6hWrRpWr16N9evXo3379gCA8PBweHp64tChQ2jRogV27dqFhIQE/P7777C3t0ejRo0wY8YMjB07FlOnToWRkRGWL18ONzc3zJs3DwDg6emJ/fv3Y8GCBQgICAAAzJ8/H++//z4GDRoEAFi+fDm2bduGNWvWYNy4cc89KET08tJllWKAKxUTVQTPfE1Obm4uNmzYgAcPHsDX1xdxcXHIzs6Gv7+/1MfDwwPVq1dHbGwsACA2Nhb169fXOn0VEBCA9PR06WhQbGysVoz8PvkxsrKyEBcXp9VHT08P/v7+Up+iZGZmIj09XetBRERE6lTqIuf06dOwsLCAsbExhg4dip9//hleXl5ISUmBkZERrK2ttfrb29sjJSUFAJCSkqJV4OS357cV1yc9PR3//vsvbt++jdzc3EL75McoysyZM2FlZSU9nJ2dS7v5REREVEGUejHAOnXqID4+Hmlpafjpp58QFBSEvXv3vojcZDd+/HiEhoZKz9PT01nolDNeUEtERC9KqYscIyMj1KpVCwDg7e2No0ePYtGiRejduzeysrJw7949raM5qampcHBwAAA4ODjgyJEjWvFSU1Oltvz/zX/t6T6WlpYwNTWFvr4+9PX1C+2TH6MoxsbGMDY2Lu0mExERUQX03Ovk5OXlITMzE97e3jA0NER0dLTUdu7cOSQnJ8PX1xcA4Ovri9OnT2v9CioqKgqWlpbw8vKS+jwdI79PfgwjIyN4e3tr9cnLy0N0dLTUh4iIiKhUR3LGjx+PwMBAVK9eHffv38f69esRExODnTt3wsrKCsHBwQgNDYWtrS0sLS3x0UcfwdfXFy1atAAAdOzYEV5eXhgwYADmzJmDlJQUTJw4EcOHD5eOsAwdOhRLlizBmDFj8N5772H37t348ccfsW3bNimP0NBQBAUFoWnTpmjevDkWLlyIBw8eSL+2IqKXjy6/iuIvooheLqUqcm7evImBAwfixo0bsLKyQoMGDbBz50689tprAIAFCxZAT08PPXr00FoMMJ++vj62bt2KYcOGwdfXF+bm5ggKCsL06dOlPm5ubti2bRtGjhyJRYsWoVq1ali1apX083EA6N27N27duoXJkycjJSUFjRo1QmRkZIGLkYmIiOjlVaoiZ/Xq1cW2m5iYICwsDGFhYUX2cXFxwfbt24uN4+fnhxMnThTbJyQkBCEhIcX2ISLl4xEYInpRSn3hMRERCxMiqgh4g04iIiJSJR7JIVXQZb0dgGvuEBG9THgkh4iIiFSJR3KIXiK8loaIXiYscohekClTp+rQiafPiKjiqSiXCLDIIfoPFidEROrAa3KIiIhIlXgkh6gC4NElIqLS45EcIiIiUiUWOURERKRKLHKIiIhIlXhNDpUrXmtCREQvCo/kEBERkSrxSM5LRJfFm8p74SYiIiK58EgOERERqRKP5FQAPAJDRERUejySQ0RERKrEIoeIiIhUiUUOERERqRKvySFV0Gm9HYBr7hARvUR4JIeIiIhUiUdyXiD+KoqIiKj88EgOERERqRKLHCIiIlIlnq4iekE0U0vuI154FkRELy8eySEiIiJV4pEceiZqvqiaR2CIiNSBRQ5RBcDCi4io9Hi6ioiIiFSJR3L+Q82nYXRaFbiCbhsREdF/8UgOERERqRKP5FQAPAJDRERUejySQ0RERKrEIzkvEI/AEBERlZ9SHcmZOXMmmjVrhkqVKsHOzg7du3fHuXPntPo8evQIw4cPR+XKlWFhYYEePXogNTVVq09ycjK6dOkCMzMz2NnZYfTo0cjJydHqExMTgyZNmsDY2Bi1atVCREREgXzCwsLg6uoKExMT+Pj44MiRI6XZHCIiIlKxUh3J2bt3L4YPH45mzZohJycHn332GTp27IiEhASYm5sDAEaOHIlt27Zh06ZNsLKyQkhICN566y0cOHAAAJCbm4suXbrAwcEBBw8exI0bNzBw4EAYGhriyy+/BAAkJSWhS5cuGDp0KNatW4fo6GgMHjwYjo6OCAgIAABs3LgRoaGhWL58OXx8fLBw4UIEBATg3LlzsLOzk3OMiIiI6ClTMVWnflNQvmcrSlXkREZGaj2PiIiAnZ0d4uLi0LZtW6SlpWH16tVYv3492rdvDwAIDw+Hp6cnDh06hBYtWmDXrl1ISEjA77//Dnt7ezRq1AgzZszA2LFjMXXqVBgZGWH58uVwc3PDvHnzAACenp7Yv38/FixYIBU58+fPx/vvv49BgwYBAJYvX45t27ZhzZo1GDdu3HMPDBEREVVsz3XhcVpaGgDA1tYWABAXF4fs7Gz4+/tLfTw8PFC9enXExsYCAGJjY1G/fn3Y29tLfQICApCeno4zZ85IfZ6Okd8nP0ZWVhbi4uK0+ujp6cHf31/qU5jMzEykp6drPYiIiEidnrnIycvLw4gRI9CqVSvUq1cPAJCSkgIjIyNYW1tr9bW3t0dKSorU5+kCJ789v624Punp6fj3339x+/Zt5ObmFtonP0ZhZs6cCSsrK+nh7Oxc+g0nIiKiCuGZi5zhw4fjzz//xIYNG+TM54UaP3480tLSpMe1a9fKOyUiIiJ6QZ7pJ+QhISHYunUr9u3bh2rVqkmvOzg4ICsrC/fu3dM6mpOamgoHBwepz39/BZX/66un+/z3F1mpqamwtLSEqakp9PX1oa+vX2if/BiFMTY2hrGxcbHbxp99V0y63MAS4E0siYheJqUqcoQQ+Oijj/Dzzz8jJiYGbm5uWu3e3t4wNDREdHQ0evToAQA4d+4ckpOT4evrCwDw9fXFF198gZs3b0q/goqKioKlpSW8vLykPtu3b9eKHRUVJcUwMjKCt7c3oqOj0b17dwCPT59FR0cjJCSklENA5Yl31yYiohelVEXO8OHDsX79evzyyy+oVKmSdP2LlZUVTE1NYWVlheDgYISGhsLW1haWlpb46KOP4OvrixYtWgAAOnbsCC8vLwwYMABz5sxBSkoKJk6ciOHDh0tHWYYOHYolS5ZgzJgxeO+997B79278+OOP2LZtm5RLaGgogoKC0LRpUzRv3hwLFy7EgwcPpF9bERER0cutVEXOsmXLAAB+fn5ar4eHh+Pdd98FACxYsAB6enro0aMHMjMzERAQgKVLl0p99fX1sXXrVgwbNgy+vr4wNzdHUFAQpk+fLvVxc3PDtm3bMHLkSCxatAjVqlXDqlWrpJ+PA0Dv3r1x69YtTJ48GSkpKWjUqBEiIyMLXIxMLwZP6xERkdKV+nRVSUxMTBAWFoawsLAi+7i4uBQ4HfVffn5+OHHiRLF9QkJCeHqKiIiICsUbdBIREZEqscghIiIiVWKRQ0RERKrEIoeIiIhUiUUOERERqRKLHCIiIlIlFjlERESkSixyiIiISJVY5BAREZEqscghIiIiVWKRQ0RERKrEIoeIiIhUiUUOERERqRKLHCIiIlIlFjlERESkSixyiIiISJVY5BAREZEqscghIiIiVWKRQ0RERKrEIoeIiIhUiUUOERERqRKLHCIiIlIlFjlERESkSixyiIiISJVY5BAREZEqscghIiIiVWKRQ0RERKrEIoeIiIhUyaC8E6CKSTO15D7ihWdBRERUNB7JISIiIlXikZwKQK6jJjz6QkRELxMeySEiIiJV4pGcF4hHToiIiMoPj+QQERGRKrHIISIiIlUqdZGzb98+vP7663BycoJGo8GWLVu02oUQmDx5MhwdHWFqagp/f39cuHBBq8+dO3fQv39/WFpawtraGsHBwcjIyNDqc+rUKbRp0wYmJiZwdnbGnDlzCuSyadMmeHh4wMTEBPXr18f27dtLuzlERESkUqUuch48eICGDRsiLCys0PY5c+Zg8eLFWL58OQ4fPgxzc3MEBATg0aNHUp/+/fvjzJkziIqKwtatW7Fv3z4MGTJEak9PT0fHjh3h4uKCuLg4zJ07F1OnTsWKFSukPgcPHkTfvn0RHByMEydOoHv37ujevTv+/PPP0m4SERERqVCpLzwODAxEYGBgoW1CCCxcuBATJ05Et27dAADffvst7O3tsWXLFvTp0wdnz55FZGQkjh49iqZNmwIAvv76a3Tu3BlfffUVnJycsG7dOmRlZWHNmjUwMjJC3bp1ER8fj/nz50vF0KJFi9CpUyeMHj0aADBjxgxERUVhyZIlWL58+TMNBhEREamHrNfkJCUlISUlBf7+/tJrVlZW8PHxQWxsLAAgNjYW1tbWUoEDAP7+/tDT08Phw4elPm3btoWRkZHUJyAgAOfOncPdu3elPk9/Tn6f/M8pTGZmJtLT07UeREREpE6y/oQ8JSUFAGBvb6/1ur29vdSWkpICOzs77SQMDGBra6vVx83NrUCM/DYbGxukpKQU+zmFmTlzJqZNm1bsNvBn30REROrwUv26avz48UhLS5Me165dK++UiIiI6AWRtchxcHAAAKSmpmq9npqaKrU5ODjg5s2bWu05OTm4c+eOVp/CYjz9GUX1yW8vjLGxMSwtLbUeREREpE6yFjlubm5wcHBAdHS09Fp6ejoOHz4MX19fAICvry/u3buHuLg4qc/u3buRl5cHHx8fqc++ffuQnZ0t9YmKikKdOnVgY2Mj9Xn6c/L75H8OERERvdxKXeRkZGQgPj4e8fHxAB5fbBwfH4/k5GRoNBqMGDECn3/+OX799VecPn0aAwcOhJOTE7p37w4A8PT0RKdOnfD+++/jyJEjOHDgAEJCQtCnTx84OTkBAPr16wcjIyMEBwfjzJkz2LhxIxYtWoTQ0FApj08++QSRkZGYN28eEhMTMXXqVBw7dgwhISHPPypERERU4ZX6wuNjx47h1VdflZ7nFx5BQUGIiIjAmDFj8ODBAwwZMgT37t1D69atERkZCRMTE+k969atQ0hICDp06AA9PT306NEDixcvltqtrKywa9cuDB8+HN7e3qhSpQomT56stZZOy5YtsX79ekycOBGfffYZ3N3dsWXLFtSrV++ZBoKIiIjUpdRFjp+fH4Qo+vdFGo0G06dPx/Tp04vsY2tri/Xr1xf7OQ0aNMAff/xRbJ9evXqhV69exSdMREREL6WX6tdVRERE9PJgkUNERESqxCKHiIiIVIlFDhEREakSixwiIiJSJRY5REREpEoscoiIiEiVWOQQERGRKrHIISIiIlVikUNERESqxCKHiIiIVIlFDhEREakSixwiIiJSJRY5REREpEoscoiIiEiVWOQQERGRKrHIISIiIlVikUNERESqxCKHiIiIVIlFDhEREakSixwiIiJSJRY5REREpEoscoiIiEiVWOQQERGRKrHIISIiIlVikUNERESqxCKHiIiIVIlFDhEREakSixwiIiJSJRY5REREpEoscoiIiEiVWOQQERGRKrHIISIiIlVikUNERESqxCKHiIiIVKnCFzlhYWFwdXWFiYkJfHx8cOTIkfJOiYiIiBSgQhc5GzduRGhoKKZMmYLjx4+jYcOGCAgIwM2bN8s7NSIiIipnFbrImT9/Pt5//30MGjQIXl5eWL58OczMzLBmzZryTo2IiIjKmUF5J/CssrKyEBcXh/Hjx0uv6enpwd/fH7GxsYW+JzMzE5mZmdLztLQ0AEB6evr/d3pU8mdr9S+OXLGUFkfOWGUYR85YHG+Z4sgZi3NJljhyxuJcKrmLnLGUFkfOWP+Nk/9cCFH8G0UF9ffffwsA4uDBg1qvjx49WjRv3rzQ90yZMkUA4IMPPvjggw8+VPC4du1asbVChT2S8yzGjx+P0NBQ6XleXh7u3LmDypUrQ6PRFPqe9PR0ODs749q1a7C0tHzmz5YrjhJz4rZVzJzUvG1KzInbxpyUFkeJOekaRwiB+/fvw8nJqdh4FbbIqVKlCvT19ZGamqr1empqKhwcHAp9j7GxMYyNjbVes7a21unzLC0tn3syyRlHzlhKiyNnLKXFkTOW0uLIGUvNOXHbyjaWmnN62bfNysqqxDgV9sJjIyMjeHt7Izo6WnotLy8P0dHR8PX1LcfMiIiISAkq7JEcAAgNDUVQUBCaNm2K5s2bY+HChXjw4AEGDRpU3qkRERFROavQRU7v3r1x69YtTJ48GSkpKWjUqBEiIyNhb28v22cYGxtjypQpBU5zlVccJebEbauYOal525SYE7eNOSktjhJzknPbAEAjREm/vyIiIiKqeCrsNTlERERExWGRQ0RERKrEIoeIiIhUiUUOERERqRKLHCIiIlKlCv0T8hchJycHZ86cQUpKCgDAwcEBXl5eMDQ0LJc4csZKSUnB4cOHteL4+PgUuUL0i85HzlhKG6On5d8U9nl/Eqm0OGlpaVrjpMvqoy8yjtJyknM/yae0OaC0uSRnTnLGkiOOnGMkV05KjFOAPLfLrPhyc3PFhAkThLW1tdBoNFoPa2trMXHiRJGbm1tmceSMlZGRIfr37y/09fWFgYGBsLOzE3Z2dsLAwEDo6+uLd955Rzx48KBCbpvSxijfrl27RGBgoLC2thZ6enpCT09PWFtbi8DAQBEVFVVh4wghxMqVK4Wnp6cUJ//h6ekpVq1aVeZxlJaTnPuJEMqbA0qcS3LmpLRxknM/Udq2yfm9FYVFzhOjR48WVatWFcuXLxdJSUni4cOH4uHDhyIpKUl88803ws7OTowZM6bM4sgZKzg4WLi7u4vIyEiRk5MjvZ6TkyN27twpateuLQYPHlwht01pYySEEBEREcLAwED06dNHhIeHi+3bt4vt27eL8PBw0bdvX2FoaCi+/fbbChdHCCHmzJkjzMzMxLhx48SePXtEQkKCSEhIEHv27BHjx48X5ubmYu7cuWUWR4k5ybmfKG0OKHEuyZmT0sZJzv1Eadsm5/dWHBY5T9jb24vIyMgi2yMjI4WdnV2ZxZEzlrW1tThw4ECR7fv37xfW1tZllo+csZQ2RkII4e7uLpYsWVJke1hYmKhVq1aFiyOEENWrVxcbN24ssn3Dhg3C2dm5zOIoMSc59xOlzQElziU5c1LaOMm5nyht2+T83orDIucJMzMzcerUqSLbT548KczNzcssjpyxLC0txdGjR4tsP3LkiLC0tCyzfOSMpbQxEkIIY2NjkZiYWGR7YmKiMDExqXBxhBDCxMREJCQkFNl+5swZYWpqWmZxlJiTnPuJ0uaAEueSnDkpbZzk3E+Utm1yfm/F4a+rnvDz88OoUaNw+/btAm23b9/G2LFj4efnV2Zx5IzVtWtXDBkyBCdOnCjQduLECQwbNgyvv/56meUjZyyljREA1K1bF6tXry6yfc2aNfDy8qpwcQCgWbNmmDVrFnJycgq05ebmYvbs2WjWrFmZxVFiTnLuJ0qbA0qcS3LmpLRxknM/Udq2yfm9FYf3rnri2rVr6Ny5MxITE1G/fn3pJp+pqak4ffo0vLy8sHXrVjg7O5dJHDlj3b17F/369cPOnTthY2MDOzs7AMDNmzdx7949BAQEYP369bC2tq5w26a0MQKAmJgYdO3aFTVq1IC/v79WTtHR0bh8+TK2bduGtm3bVqg4AHDq1CkEBAQgOzsbbdu21Yq1b98+GBkZYdeuXahXr16ZxFFiTnLuJ0qbA0qcS3LmpLRxknM/Udq2yfm9FYdFzlPy8vKwc+dOHDp0SOuner6+vujYsSP09HQ78CVXHLljnT17ttA4Hh4e5ZKPEsdbjjECgCtXrmDZsmWFxho6dChcXV0rZBwAuH//Pr7//vtCY/Xr1w+WlpZlGkeJOck5J5U2B5Q4l+TMSWnjJOd+orRtk/N7KwqLHCIiIlIlLgb4H0eOHEFsbKxWVdmyZUudz3vKHUeuWFlZWdiyZUuhcbp16wYjI6MyzUfuWEobI6DgYnCOjo7w9PR87oUOyzsOUHDRREdHRzRv3rzUiybKFUeJOcm5nyhtDihxLsmZk9LGSc79RGnbJuf3VqjnvnRZJVJTU0Xr1q2FRqMRLi4uonnz5qJ58+bCxcVFaDQa0bp1a5GamlpmceSMdeHCBVGjRg1hYmIi2rVrJ95++23x9ttvi3bt2gkTExNRq1YtceHChQq5bUobIyGUt9ChnIvTybVoopyLLyotJzn3E6XNASXOJS5Sqtt+orRtk3vRzKKwyHmiR48ewtfXt9CftCUmJoqWLVuKnj17llkcOWP5+/uLbt26ibS0tAJtaWlpolu3bqJjx45llo+csZQ2RkIob6FDORenk2vRRDkXX1RaTnLuJ0qbA0qcS1ykVLf9RGnbJuf3VhwWOU9YWFiI48ePF9l+7NgxYWFhUWZx5IxlamoqTp8+XWT7qVOndFprQYnbprQxEkJ5Cx3KuTidXIsmyrn4otJyknM/UdocUOJc4iKluu0nSts2Ob+34nCdnCeMjY2Rnp5eZPv9+/d1unGYXHHkjGVtbY0rV64U2X7lyhWdfhqtxG1T2hjlf6aTk1OR7Y6Ojnjw4EGFiwM8/tVQcdcmGRkZIS8vr8ziKDEnOfcTpc0BJc4lOXNS2jjJuZ8obdvk/N6K9dxlkkp8+OGHwsXFRWzevFnrlEVaWprYvHmzcHV1FSEhIWUWR85YkyZNEjY2NmL+/Pni5MmTIiUlRaSkpIiTJ0+K+fPnC1tbWzFlypQKuW1KGyMhhOjcubPo2LGjuHXrVoG2W7duiU6dOokuXbpUuDhCCNGvXz/RuHHjQo9UHD9+XHh7e4v+/fuXWRwl5iTnfqK0OaDEuSRnTkobJzn3E6Vtm5zfW3FY5Dzx6NEjMXToUGFkZCT09PSEiYmJMDExEXp6esLIyEgMGzZMPHr0qMziyB1r1qxZwtHRUWg0GulurxqNRjg6OorZs2eXeT5KHG85xkgIIZKTk0W9evWEgYGBaNy4sejUqZPo1KmTaNy4sTAwMBANGjQQycnJFS6OEELcuXNHdOrUSWg0GmFrays8PDyEh4eHsLW1FXp6eiIwMFDcvXu3zOIoMSc556TS5oAS55KcOSltnOTcT5S2bXJ+b8XhOjn/kZ6ejri4OK2ffXp7e5dqwSU548gdKykpSSuOm5tbueajxPGWY4yUttChnIvTAUBiYmKBn0c/y6KJcsVRYk5yzUmlzQElziW1L1Iq15xU2rbJPZcKwyKHiIiIVIkXHhciOTkZN27c0Hrtxo0bSE5OLpc4csbat28fjh07pvXasWPHsG/fvnLJR85YShsjIjn3EyJ6Bs99wkuFNBqN8PT01HrNw8ND6OnplUscJebEbdONq6ur8Pf313qtQ4cOws3NrULHEeLxOHl5eWm99qzjLUccJebEuaQbucZbzpyUNk5y7idK2zY5v7f/4m0dCrFnzx6YmZlpvfbtt9/i4cOH5RJHzlhJSUkFlsuOjo5GdnZ2ueQjZyyljREABAUFoWrVqlqvvfnmm7h9+3aFjgMAa9asKfCz+pkzZyItLa1c4igxJzn3E6XNASXOJTlzUto4ybmfKG3b5Pze/ovX5BAREZEq8UhOIdLS0rSu9LaysirXOHLF+u+N0BwcHODl5fVMN0JT2rbJFUfOMXoZZGZmAoDOi9u96DhKy0nO/UTt5JwDasUxegbPfcJLRVauXCk8PT2lNVLyH56enmLVqlVlHkeuWHLeCE1p2yZXHLlvFnfmzBkxbNgw0ahRI+Hg4CAcHBxEo0aNxLBhw8SZM2cqbBwhhNi1a5cIDAwU1tbW0lhbW1uLwMBAERUVVeZxlJiTnPuJ0uaAEueSnDkpbZzk3E+Utm1yfm9F4ZGcJ+bOnYupU6fi448/RkBAAOzt7QEAqamp2LVrFz755BPcvXsXo0aNKpM4csYaN24cIiIiMGvWrELjTJo0CVlZWZg9e3aF2zaljREA7NixA927d0eTJk3QrVs3rVhRUVFo0qQJfvnlFwQEBFSoOACwdu1aDB48GD179sSCBQsKjFPnzp2xevVqDBgwoEziKDEnOfcTpc0BJc4lOXNS2jjJuZ8obdvk/N6KJUuppALVq1cXGzduLLJ9w4YNwtnZucziyBlLrhuhKXHblDZGQgjRoEEDMWnSpCLbp0yZIurXr1/h4gghhLu7u1iyZEmR7WFhYaJWrVplFkeJOcm5nyhtDihxLsmZk9LGSc79RGnbJuf3VhwWOU+YmJiIhISEItvPnDmj012o5YojZywzMzNx6tSpIttPnjwpzM3NyywfOWMpbYzyc0pMTCyyPTExUZiYmFS4OEIIYWxsLEssueIoMSe59xMlzQElziU5c1LaOMm5nyht2+T83orDxQCfaNasGWbNmoWcnJwCbbm5uZg9ezaaNWtWZnHkjOXn54dRo0YV+nO827dvY+zYsfDz8yuzfOSMpbQxAgBXV1ds27atyPZt27bBxcWlwsUBgLp162L16tVFtq9ZswZeXl5lFkeJOcm5nyhtDihxLsmZk9LGSc79RGnbJuf3Vhxek/PEkiVLEBAQAAcHB7Rt21br/OC+fftgZGSEXbt2lVkcOWMtX74cnTt3hqOjI+rXr68V5/Tp0/Dy8sLWrVsr5LYpbYwAYPr06ejXrx9iYmLg7++vFSs6OhqRkZFYv359hYsDAPPmzUPXrl0RGRlZaKzLly8X+39ccsdRYk5y7idKmwNKnEty5qS0cZJzP1Hatsn5vRWH6+Q85f79+/j+++8LvVlYv379dL6xnlxx5Iwl143QlLhtShsjADh48CAWL15c6E31PvnkE/j6+lbIOABw5coVLFu2rNBxGjp0KFxdXcs0jhJzknM/UdocUOJckjMnpY2TnPuJ0rZNzu+tKCxyiIiISJV4uuo/UlJScPjwYamqdHR0RPPmzeHg4FAuceSMdeTIkQIVc8uWLXW+PkDufOSMpbQxyqekhQ7ljPPfRRMdHR3h6elZ6kUT5YqjxJzk3E8A5c0Bpc0lOXOSM9aLWKT0ecZIrpyUGKdQz33pskpkZGSI/v37C319fWFgYCDs7OyEnZ2dMDAwEPr6+uKdd94RDx48KLM4csZKTU0VrVu3FhqNRri4uIjmzZuL5s2bCxcXF6HRaETr1q1Fampqhdw2pY1Rvv8uBqfRaGRZ6LC848i1aKKciy8qLSc59xMhlDcHlDaX5MxJzlhyxJF7kVIlbZuccYrDIueJ4OBg4e7uLiIjI0VOTo70ek5Ojti5c6eoXbu2GDx4cJnFkTNWjx49hK+vb6E/10tMTBQtW7YUPXv2rJDbprQxEkKIOXPmCDMzMzFu3DixZ88ekZCQIBISEsSePXvE+PHjhbm5uZg7d26FiyOEEKNHjxZVq1YVy5cvF0lJSeLhw4fi4cOHIikpSXzzzTfCzs5OjBkzpsziKDEnOfcTpc0BJc4lOXNS2jjJuZ8obdvk/N6KwyLnCWtra3HgwIEi2/fv3y+sra3LLI6csSwsLMTx48eLbD927JiwsLAos3zkjKW0MRJCeQsdyrk4nVyLJsq5+KLScpJzP1HaHFDiXOIipbrtJ0rbNjm/t+JwnZwn8vLyYGRkVGS7kZER8vLyyiyOnLGMjY2Rnp5eZPv9+/d1uuGbErdNaWMEADdv3kT9+vWLbK9fv36h6/EoPQ7weBycnJyKbHd0dMSDBw/KLI4Sc5JzP1HaHFDiXJIzJ6WNk5z7idK2Tc7vrVjPXSapRL9+/UTjxo0L/df88ePHhbe3t+jfv3+ZxZEz1ocffihcXFzE5s2bRVpamvR6Wlqa2Lx5s3B1dRUhISFllo+csZQ2RkII0aZNGzFw4ECRnZ1doC0nJ0cMHDhQtG3btsLFEUKIzp07i44dO4pbt24VaLt165bo1KmT6NKlS5nFUWJOcu4nSpsDSpxLcuaktHGScz9R2rbJ+b0Vhz8hf+Lu3bvo168fdu7cCRsbG9jZ2QF4XG3eu3cPAQEBWL9+PaytrcskjpyxMjMzMWLECKxZswY5OTnSvzKzsrJgYGCA4OBgLFiwoMQjFUrcNqWNEQCcOnUKAQEByM7OLnYxuHr16lWoOABw7do1dO7cGYmJicUumujs7FwmcZSYk5z7idLmgBLnkpw5KW2c5NxPlLZtcn5vxWGR8x+JiYmFLkzk4eFRLnHkjJWeno64uDitON7e3qVamEzOfOSMpbQxUtpCh3IuTifXoolyLr6oxJzkmpNKmwNKnEtcpLRibpuc31tRWOQQERGRKvHC45dMcnIybty4ofXajRs3kJycXE4ZKQ/HiIhIHVjkFEJPTw9169bVes3T0xP6+vrlEkfOWK6urujQoYPWa+3bt4ebm1u55CNnLKWNEQC8+uqrePfdd7VeCwoKQvv27St0HABwc3PDa6+9pvWav78/atSoUS5xlJiTnPuJ0uaAEueSnDkpbZzk3E+Utm1yfm//xds6FGLNmjUFLgqcOXMm0tLSyiWOnLH27NkDMzMzrde+/fZbPHz4sFzykTOW0sYIeFwwOTo6ar32yiuvlOo8uhLjAI//T6hq1apar7355pul/tmnXHGUmJOc+4nS5oAS55KcOSltnOTcT5S2bXJ+b//Fa3KIiIhIlXgkpwiZmZkAoPMCcC86jlyx5LwRmtK2Ta44L/RmcfTSkXM/IaLSYZHzlKioKCxYsACxsbHS6reWlpbw9fVFaGgo/P39yzSOnLFWrVqF+fPn49y5c1qv16lTB59++imCg4Mr7LYpbYwA4Pbt21izZk2hdzR/9913Cxx2rihxACAhIQFLliwp9OfRISEh8PLyKtM4SsxJzv1EaXNAiXNJzpyUNk5y7idK2zY5v7ei8HTVE2vXrsXgwYPRs2dPBAQEaC1MtGvXLvz0009YvXo1BgwYUCZx5Iw1d+5cTJ06FR9//HGhcRYvXoypU6di1KhRFW7blDZGAHD06FEEBATAzMwM/v7+WrGio6Px8OFD7Ny5E02bNq1QcQBgx44d6N69O5o0aVJgnKKiohAXF4dffvkFAQEBZRJHiTnJuZ8obQ4ocS7JmZPSxknO/URp2ybn91as514zWSXc3d3FkiVLimwPCwsTtWrVKrM4csaS60ZoStw2pY2REEL4+PiIIUOGiLy8vAJteXl5YsiQIaJFixYVLo4QQjRo0EBMmjSpyPYpU6aI+vXrl1kcJeYk536itDmgxLkkZ05KGyc59xOlbZuc31txWOQ8YWxsLBITE4tsT0xMFCYmJmUWR85YJiYmIiEhocj2M2fOCFNT0zLLR85YShuj/Fhnz54tsv3s2bM656SkOPmx5BpvueaS0nKScz9R2hxQ6lySMycljZPc+4nStk2u7604XCfnibp162L16tVFtq9Zs0anc59yxZEzVrNmzTBr1izk5OQUaMvNzcXs2bPRrFmzMstHzlhKGyPg8TnlI0eOFNl+5MgR6dBsRYoDPP6p57Zt24ps37ZtG1xcXMosjhJzknM/UdocUOJckjMnpY2TnPuJ0rZNzu+tOLzw+Il58+aha9euiIyMLPT84OXLl4udbHLHkTPWkiVLEBAQAAcHh2JvhFYRt01pYwQAo0aNwpAhQxAXF4cOHToUyGnlypX46quvKlwcAJg+fTr69euHmJiYQsc7MjIS69evL7M4SsxJzv1EaXNAiXNJzpyUNk5y7idK2zY5v7diPfexIBVJSkoSY8aMEW3bthW1a9cWtWvXFm3bthVjx44VSUlJZR5Hzljp6eli6dKlYuDAgaJjx46iY8eOYuDAgWLZsmUiLS2tQm+b0sZIiMfX8Pj4+AgDAwOh0WiERqMRBgYGwsfHp9hrf5QeRwghDhw4IHr37i2qV68ujIyMhJGRkahevbro3bu3OHjwYJnHUWJOcu4nSpsDSpxLcuaktHGScz9R2rbJ+b0Vhb+uInqBsrOzpRVJq1SpAkNDQ1XEobKntDmgxLkkZ04cp4oXpzAscv4jJycHZ86ckX6z7+joCE9Pz1IPulxx5IyVkpKCw4cPa8Vp3rw5HBwcyiUfOWMpbYyepqSFDuWMI9eiiXIuvqiknOTcT/IpbQ4obS7JmZOcsZS4SKmStk3OOAXIcjxIBXJzc8WECROEtbW1dNgs/2FtbS0mTpwocnNzyyyOnLEyMjJE//79hb6+vjAwMBB2dnbCzs5OGBgYCH19ffHOO++IBw8eVMhtU9oY5du1a5cIDAwU1tbWQk9PT+jp6Qlra2sRGBgooqKiKmwcIYRYuXKl8PT0lOJoNBqhp6cnPD09xapVq8o8jtJyknM/EUJ5c0CJc0nOnJQ2TnLuJ0rbNjm/t6KwyHli9OjRomrVqmL58uUiKSlJPHz4UDx8+FAkJSWJb775RtjZ2YkxY8aUWRw5YwUHBwt3d3cRGRkpcnJypNdzcnLEzp07Re3atcXgwYMr5LYpbYyEECIiIkIYGBiIPn36iPDwcLF9+3axfft2ER4eLvr27SsMDQ3Ft99+W+HiCCHEnDlzhJmZmRg3bpzYs2ePSEhIEAkJCWLPnj1i/PjxwtzcXMydO7fM4igxJzn3E6XNASXOJTlzUto4ybmfKG3b5PzeisMi5wl7e3sRGRlZZHtkZKSws7MrszhyxrK2thYHDhwosn3//v3C2tq6zPKRM5bSxkgI5S10KOfidHItmijn4otKy0nO/URpc0CJc4mLlOq2nyht2+T83orDIucJMzMzcerUqSLbT548KczNzcssjpyxLC0txdGjR4tsP3LkiLC0tCyzfOSMpbQxEkJ5Cx3KvTidHIsmyr34opJyknM/UdocUOJc4iKluu0nSts2Ob+34nAxwCf8/PwwatQo6Qrvp92+fRtjx46Fn59fmcWRM1bXrl0xZMgQnDhxokDbiRMnMGzYMLz++utllo+csZQ2RoDyFjqUc3E6uRZNlHPxRaXlJOd+orQ5oMS5xEVKddtPlLZtcn5vxeGvq564du0aOnfujMTERNSvX19rYaLTp0/Dy8sLW7duhbOzc5nEkTPW3bt30a9fP+zcuRM2Njaws7MDANy8eRP37t1DQEAA1q9fD2tr6wq3bUobIwCIiYlB165dUaNGjWIXg2vbtm2FigMAp06dQkBAALKzs4tdNLFevXplEkeJOcm5nyhtDihxLsmZk9LGSc79RGnbJuf3VhwWOU/Jy8vDzp07cejQoQK3tO/YsSP09HQ78CVXHLljJSYmFrilva+vLzw8PMolHyWOtxxjBABXrlzBsmXLCs1p6NChcHV1rZBxAOD+/fv4/vvvC43Vr18/WFpalmkcJeYk55xU2hxQ4lySMyeljZOc+4nStk3O760oLHKIiIhIlXhNDhEREakSixwiIiJSJRY5REREpEoscoiIiEiVDMo7ATVT4o0HlUhJN1Wkik/xNwwkojLDIzk6Onv2LGrUqKFT31WrVsHLywu2trbw8vLS+u/iFj960bGKcvLkSejr6z93nNKMESDftlWkMQIer6Py3nvv6dQ3ISEBH374IRo3bgxHR0c4OjqicePG+PDDD5GQkKDzZ8oVpySXLl1C+/btyzxOVFQUOnfuDBsbG5iZmcHMzAw2Njbo3Lkzfv/99zKPU5zS7ifF4VwqWWnGCKhY4yTXGAHqnUs8kqOjrKwsXL16tcR+c+fOxdSpU/Hxxx8jICBAa4GjXbt24ZNPPsHdu3cxatSoMo1VEjlWEtB1jAD5tq2ijREA3LlzB2vXrsWaNWuK7bdjxw50794dTZo0Qbdu3bS2LSoqCk2aNMEvv/yCgICAMomji4yMDOzdu7dM46xduxaDBw9Gz549sWDBggJzoHPnzli9ejUGDBhQJnFKUpr9pCScSyXTdYyAijdOco0RoN65xHVynggNDS22/datW1i/fj1yc3OL7efi4oK5c+fi7bffLrR948aNGD16NJKTk0vMSa5Yb731VrHtaWlpiImJKXHb5BojQL5tU9oYAcCvv/5abPvly5fx6aeflhirYcOG6NatG6ZPn15o+9SpU7F582acOnWqTOIAwOLFi4tt//vvv/HVV1+VuG1yxQGA2rVr45NPPsHw4cMLbV+6dCkWLFiACxculEkcOfcTzqWS54BcYwQob5zk3E/UPJeKwyLnCX19fTRq1KjI1SMzMjJw/PjxEieAqakpjh8/Dk9Pz0LbExIS0LRpUzx8+LDEnOSKZWhoiNdee02qlP/rzp072Lp1a4nbJtcYAfJtm9LGCAD09PSg0WiKPfKj0Wh0mkvx8fGoU6dOoe3nzp1Do0aN8O+//5ZJHODxtjk6OsLIyKjQ9qysLKSkpJS4bXLFAQATExOcPHnyubdPrjhy7iecS7rNJTnGCFDeOMm5n6h5LhXruW/xqRK1a9cW3333XZHtJ06cEHp6eiXGadOmjRg4cKDIzs4u0JaTkyMGDhwo2rZtq1NOcsWqX7++WLVqVZHtum6bXGMkhHzbprQxEkIIJycnsWXLlueO5eHhIebNm1dk+7x580SdOnXKLI4QQri6uoqNGzcW2a7rtskVRwghmjRpIkaPHl1k+5gxY0STJk3KLI6c+wnnUsnbJtcYCaG8cZJzP1HzXCoOr8l5omnTpoiLi8M777xTaHtJFXC+JUuWICAgAA4ODsXeUE0XcsXy9vbG8ePHERwcXGi7sbExqlevXmIcucYIkG/blDZG+bHi4uLQrVu3Qtt1Hafp06ejX79+iImJKfQGdpGRkVi/fn2ZxXl624o6PajrtskVBwDmzZuHrl27IjIystgb/ZVVHDn3E84l3efS844RoLxxknM/UfNcKg5PVz2RkpKCzMxMuLi4PHcspd14MDMzE7m5uTAzM3u2DXpCzjEClHVTRbnGCAD++OMPPHjwAJ06dSq0/cGDBzh27BjatWtXYqyDBw9i8eLFhd409JNPPoGvr69OOckVJyEhAQ8fPkTTpk0Lbc/Ozsb169dLnCNyxcmnpBsGyrmfcC6VPAfkHCNAWeMk536i5rlUHBY5REREpEpcJ6cEqampOv0S6r/+e/HW4cOHsW/fPmRnZ5dbLDlzetqzjpGcOSktzoskhNDpQsOyiiNnLDlzep55qeQ4clLi9yZnLLkobZw4RqULSEKI9PR00b9/f1G9enUxcOBAkZmZKT788EOh0WiEnp6eaNu2rUhLSysxzvXr10WrVq2Evr6+aNu2rbhz547o0qWL0Gg0QqPRiNq1a4vr16/rlJNcseSKI9cYKXHb5PzehBBi5cqVYuDAgWLNmjVCCCE2bNggPDw8hJubm5g8ebJOMbKzs8WECRNE27ZtpffMmTNHmJmZCSMjI+k7KKs4Ss1JrnmptDj5OJdKJscYyZmT0uLkU+tcKg6LnCdCQkKEh4eHWLx4sfDz8xPdunUT9erVE/v37xd79+4VXl5e4rPPPisxzoABA0TLli3Fr7/+Knr37i1atmwp2rRpI/766y9x9epV0apVKzF8+HCdcpIrllxx5BojJW6bnN/bggULhLm5uXjrrbeEo6Oj+Pzzz0XlypXF559/LqZNmyYsLS3FN998U2KciRMnCnt7exEaGiq8vLzE0KFDhbOzs/j+++/F2rVrxSuvvCJmz55dZnGUmpNc81JpcYTgXNIlllxjJGdOSosjhLrnUnFY5Dzh7Owsdu/eLYQQ4u+//xYajUb89ttvUvvWrVt1+jmbo6OjiI2NFUII8c8//wiNRiN+//13qT06OlrUqFFDp5zkiiVXHLnGSInbJuf35uHhIdatWyeEEOL48ePCwMBA6+fpq1atEt7e3iXGqVGjhjS+Fy5cEHp6emLDhg1S+8aNG0W9evXKLI5Sc5JrXiotjhCcS7rEkmuM5MxJaXGEUPdcKg6LnCeMjY1FcnKy9NzMzEycO3dOen7lyhVhZmZWYhwTExOtOObm5uLChQvS86tXrwpTU1OdcpIrllxx5BojOXNSWhwhhDA1NRVXr16VnhsbG4s///xTen7hwgVhbW1d6pxMTEzE2bNnpeeXL18WlSpVKrM4Ss1JrnmptDhCcC7pEkuuMZIzJ6XFEULdc6k4vPD4icqVK+PWrVvS827dusHa2lp6npGRodPdiO3s7HDjxg3peUhICGxtbaXnd+/ehbm5uU45yRVLrjhyjZGcOSktDgCYmZnhwYMH0vOqVavCwsJCq09OTk6JcaysrHDv3j3peZMmTVCpUiXpeWZmJjQaTZnFUWpOcs1LpcUBOJd0iSXXGMmZk9LiAOqeS8VhkfNEgwYNcPToUen5+vXrYWdnJz0/evRokbcOeFqjRo0QGxsrPZ81a5bWH8v9+/ejQYMGOuUkVyy54sg1RnLmpLQ4AODh4aF1v5Vr165prWORmJio03orXl5eOH78uPT8wIEDeOWVV6Tnp0+fhru7e5nFUWpOcs1LpcUBOJd0iSXXGMmZk9LiAOqeS8XhOjlP3LlzB3p6elr/4nrajh07YGpqCj8/v+f6nCNHjsDMzAz16tV7rjhyxtI1TlmNUWlyUmKcAwcOwNzcHI0aNSq0fenSpcjLy0NISEixcc6fPw9DQ0O4ubkV2r5+/XoYGBgUuRqq3HGUmpNc81JpcQDOJV1iyTVGcuaktDiAuudScVjkEBERkSrxdFUx6tevj2vXrj13HEtLS1y+fFmGjOSLJVccucYIUN62yfm9ffjhh7h9+/Zzx+nSpYvWtUPlHUfOWHLmJNe8VFocgHNJF3KNEaC8cZJzvNU8l/KxyCnGlStXZFnpVs6DZXLFkiuOXGMEKG/b5Pzevv/+e6Snpz93nH379uHff/9VTBw5Y8mZk1zzUmlxAM4lXcg1RoDyxknO8VbzXMrHIoeoDPCsMMmFc6lkHCPdvAzjxCKnGG3atIGpqelzx3nnnXdKdefxsoglVxy5xghQ3rbJ+b3JxcXFBYaGhoqJI2csOXOSa14qLY6clPi9yRlLLkobJ45R6fDCYyIiIlIlg/JOQEn++ecfnDp1Cg0bNoStrS1u376N1atXIzMzE7169dJpbYu//voLJiYmqFKlCgDgjz/+wPLly5GcnAwXFxcMHz4cvr6+Ouf077//4ocffsD+/ftx48YN6OnpoUaNGujevTs6dOhQqjhxcXGwtbWFl5eXVtujR4/w448/YuDAgSXGkWOMAHnHSWljlC83Nxf6+vrS88OHDyMzMxO+vr7P9K+Ve/fuYdOmTdIY9erVC1ZWVjq///LlywXG6LXXXnumo1V5eXnQ0yt4IDgvLw9//fUXqlevXmIMIQSuXLkCZ2dnGBgYICsrCz///DMyMzPRuXNnaW6UJDMzE3p6etKYXrp0CWvWrJHGKTg4uMifqRZm9+7dBcbpjTfeKPWaHX/99Resra0LLLiWnZ2N2NhYtG3bVudYnEslk3uMAOWMk1xjBKh7LhXquddMVonDhw8LKysrodFohI2NjTh27Jhwc3MT7u7uombNmsLU1FTExcWVGKd58+bS/Ti2bNki9PT0xBtvvCHGjh0r3nzzTWFoaKh1H5viXLhwQbi4uAg7Ozvh7OwsNBqN6NKli/Dx8RH6+vqiV69eIjs7u8Q4586dEy4uLlp3Qf7777+l9pSUFKGnp1diHLnGSAj5xklpYySEfHc0f/PNN8WmTZuEEEL8+eefokqVKqJq1arCx8dH2NvbCwcHB5GQkFBinIyMDNGzZ0/p8/X09ISDg4PQ19cXFhYWYsmSJTptlxBCpKWliV69egkTExNhZ2cnJk2aJHJycqR2XccpMTFRuLi4CD09PVGrVi1x+fJl4e3tLczNzYWZmZmoUqWKOH/+vE45tWvXThqn/fv3C2NjY9GgQQPRu3dv0bhxY2FmZiYOHjxYYpzU1FTRvHlzoaenJwwMDISenp7w9vaWxmr06NE65XP9+nXRrFkzoaenJ/T19cWAAQPE/fv3pXbOpcfkmktyjZEQyhsnucZICHXPpeKwyHnC399fDB48WKSnp4u5c+eKatWqicGDB0vtgwYNEt27dy8xjrm5ubh8+bIQQggfHx8xa9Ysrfavv/5aNG7cWKecAgMDxQcffCDy8vKEEELMmjVLBAYGCiGEOH/+vHB1dRVTpkwpMU737t1Fly5dxK1bt8SFCxdEly5dhJubm3QfE113FLnGSAj5xklpYySEfHc0t7Gxke7lEhgYKPr16ycyMzOFEEJkZWWJ4OBg0bFjxxLjDBkyRLRq1UqcPn1aXLhwQfTs2VOMGTNGPHjwQKxevVqYmZlJN+4ryccffyxq164tNm3aJFauXClcXFxEly5dpLxSUlKERqMpMU63bt3EG2+8IU6dOiVGjBghPD09Rbdu3URWVpZ49OiReP3118U777yjU06WlpZSQdSuXTsxcuRIrfaJEyeKVq1alRind+/eonv37iItLU08evRIhISEiIEDBwohHt+gtXLlymLhwoUlxhk4cKDw8fERR48eFVFRUcLb21s0bdpU3LlzRwih+xgJwbmkyzjJNUZCKG+c5BojIdQ9l4rDIucJGxsbqfrMysoSenp64vDhw1J7XFyceOWVV0qMY2VlJU6ePCmEEMLOzk7673wXL17U+eZ8ZmZmWv+azczMFIaGhuL27dtCiMdHQFxdXUuMY2dnJ06dOiU9z8vLE0OHDhXVq1cXly5d0vkPuFxjJIR846S0MRJCvjuam5qaiosXL0oxjx8/rtV+7tw5YWVlVWKcKlWqiGPHjknP79y5I0xMTMSDBw+EEEIsWbJENGrUqMQ4QghRvXp1sWfPHun5rVu3RPPmzUXHjh3Fo0ePdB6nqlWrihMnTgghHv+LTqPRiD/++ENqP3DggKhevbpOOZmbm0v/p2tvby/i4+O12i9evCgsLCxKjGNpaal1w8KMjAxhaGgo0tLShBBCfPfddzrdPdzJyUlrv8gv2ho1aiT++ecfzqUn5JpLco2REMobJ7nGKH971DqXisNfVz2RlZUl/frB0NAQZmZmWtcEVKlSBf/880+Jcdq1a4cffvgBANC4cWPExMRote/Zs0fr/hzFsba2xv3796XnDx8+RE5ODoyMjAA8vkeOLgsn/fvvvzAw+P/LrzQaDZYtW4bXX38d7dq1w/nz53XKR64xAuQbJ6WNEfD4Zp75udva2sLMzEzrHjG1atXSKacGDRpg9+7dAAAHBwdcvXpVq/3q1as6/WInJydH6/y2hYUFcnJypJv1dezYEYmJiSVvGIBbt25pbUuVKlXw+++/4/79++jcuTMePnyoU5yMjAzp3mDm5uYwNzeHo6Oj1O7s7IzU1FSdYvn4+OC3334DANSsWRMnT57Uao+Pj9e6D1lRjI2NtW4IqKenh9zcXOmmhS1btsSVK1dKjJOWlgYbGxutuJs3b4arqyteffVV3Lx5U5fNAsC5pAu5xghQ3jjJNUaAuudSsZ67TFIJDw8PER0dLT3funWrePjwofT80KFDolq1aiXGSUhIEJUrVxYDBw4UM2bMEBYWFuKdd94RX3zxhRg4cKAwNjYW4eHhOuUUFBQk2rVrJ86ePSsuX74sXWOQLyYmRjg7O5cYp1mzZuLbb78ttG348OHC2tpap38NyDVGQsg3TkobIyEe/+vr6X/Jjx07Vvzzzz/S8/j4eFGlSpUS42zdulXY2tqK8PBwER4eLlxdXcWqVavEgQMHxJo1a4Szs7NO14m89tprWoeh586dKxwdHaXnx48f1ykfIYSoU6eO2LZtW4HX79+/L3x9fUXDhg11GqeaNWtqHblZunSpSE9Pl57HxcUJBwcHnXI6ePCgsLKyElOmTBFff/21qFKlipg4caJYt26dmDx5srC2thazZ88uMc6bb74pevToITIyMkRWVpYYMWKEqFWrltR+6NAhnXKqX7+++Omnnwq8np2dLbp37y6qV6/OuSTkm0tyjZEQyhsnucZICHXPpeKwyHli6tSp4ocffiiy/bPPPhNvvfWWTrEuXrwo+vTpIypVqiRdVGVoaChatmwpfv75Z51zSk1NFS1atJAuynJxcdE6NLhp0yaxePHiEuN8+eWX0nUqhRk2bJhO53XlHCMh5BknpY2REEK88cYbxV67sWTJEtG+fXudYv3000+iWrVqQk9PTxojjUYjTExMxIgRI7QuQixKXFycsLW1FQ4ODqJ69erCyMhI63tcsmSJdO1JST766CPRs2fPQtvS09OFj4+PTv+n+8EHH4iVK1cW2T5z5kzRuXNnnXIS4nGhkz8Pnn688sorOl1HI4QQly5dEjVr1hQGBgbC0NBQWFtbi6ioKKk9PDxcjBs3rsQ4Y8aMKfKahOzsbPHGG2/o/IeJc6nkcZJzjIRQ1jjJNUZCqHsuFYfr5Ojo4cOH0NfXh7Gxsc7vEULg5s2byMvLQ5UqVZ75Z4wXLlxAZmYmPDw8tE6pKM2zjBEgzzhVlDECSn9n9NzcXMTFxSEpKQl5eXlwdHSEt7c3KlWqpPNn3rhxA1u3bkVmZibat29f4Cfyurp79y6uX7+OunXrFtp+//59HD9+HO3atXum+PmSkpJgYmKidQpLF7du3cLly5elcXJ1dS3V+x8+fIj9+/cjKysLLVq00Pln7E/LycnBw4cPi/wJbE5ODv7++2+tUwXPinOpZKUdI0A541RWYwRU7LlUHBY5REREpErK/idvGUtISMCSJUsQGxuLlJQUAI8vrPL19UVISIjOVaZcceSOVZRLly7h/fffly4mK6t8lDjeRSnNGAGPL9LesmVLgZxatmyJbt26SRdGV7Q4JUlNTcU333yDyZMnl9j39u3bWLNmTaE5vfvuu6hatarOn1tULF9fXwwaNEjnWHLmVJRr165hypQpWLNmjU79lTYHlDiX5MypIo1TacZIzpyUFqc4PJLzxI4dO9C9e3c0adIEAQEBsLe3B/B4EkVFRSEuLg6//PILAgICyiSO3LGKc/LkSTRp0gS5ubkVbtuUNkYAcPHiRQQEBOD69evw8fHRyunw4cOoVq0aduzYgVq1alWoOLrQdZyOHj2KgIAAmJmZwd/fXyun6OhoPHz4EDt37kTTpk1L/Ey5YsmZU3E4l3Sj6zjJmVNFGyfOpZKxyHmiYcOG6NatG6ZPn15o+9SpU7F582acOnWqTOLIGWvx4sXFtv/999/46quvStxRlLhtShsjAHjttddgbm6Ob7/9tsB1Genp6Rg4cCD+/fdf7Ny5s0LFAVDiOCYmJqJv374ljlOLFi3QsGFDLF++XOtn28Dja7SGDh2KU6dOITY2tsSc5IolV5xff/212PbLly/j008/5VySaS7JmZPSxkmuMZIzJ6XFKdFzX7qsEiYmJiIxMbHI9sTERGFiYlJmceSMpdFohJOTk3B1dS304eTkpNMV+krcNqWNkRCPF8s6ffp0ke2nTp0SpqamFS6OEEL6Fdt/f8X09Ou6zqX8BfwKc/bs2VLNJTliyRWnuDF6eqx0obQ5oMS5JGdOShsnucZIzpyUFqckXAzwCVdXV2zbtq3I9m3btun0awi54sgZy8XFBQsWLEBSUlKhj+I+40XkI2cspY0R8HiBwuIWjbty5Qqsra0rXBzg8SJiK1euLHSMLl++jK1bt+oUx8HBAUeOHCmy/ciRI9Lh67KKJVccR0dHbN68GXl5eYU+jh8/XmKMfEqbA0qcS3LmpLRxkmuM5MxJaXFKwguPn5g+fTr69euHmJiYQs/HR0ZGYv369WUWR85Y3t7eiIuLw9tvv11ou0ajgdDhrKUSt01pYwQAgwcPxsCBAzFp0iR06NChQE6ff/45PvroowoXB3g8TtevXy+ycLx3755O4zRq1CgMGTIEcXFxhea0cuVKfPXVVzrlJFcsueLkz6Vu3boV2s659Jhcc0nOnJQ2TnKNkRK3Tc7vrVjPfSxIRQ4cOCB69+4tLUxkZGQkqlevLnr37q3TXYzljiNXrDNnzoijR48W2Z6VlSWuXLlSZvnIHUtpYyTE4xuFOjo6SoeT8w8tOzo66rT6rlLjbN68WXz33XdFtt+5c0dEREToFGvDhg3Cx8dHGBgYSIfgDQwMhI+Pj9i4caPOOckZS444+/btEzt27CiyPSMjQ8TExOick9LmgBLnklw5yRlLjjhyjpFcOSkxTnF44THRC5SUlKT100g3NzdVxJFTdnY2bt++DQDPtWimnLHkzEkuSpsDSpxLcubEcap4cQrDa3KKMWvWLNy7d08xceSMpbQ4csZSUhw3Nzf4+voiLy8PTk5OqonztAMHDiAzM/OZ329oaAhHR0fExMQgKyvruXKRK5acOQHADz/8IN148FkpbQ4ocS7JmZNSx+l5x0jOnJQWp1CyHA9SqUqVKolLly4pJo6csZQWR85YSosjZyylxZEzlppz4raVbSw158RtKx0eySmGkOlMnlxx5IyltDhyxlJaHDljKS2OnLHUnBO3rWxjqTknblvpsMghIiIidZL1uJDKJCcni9zcXMXEkTOWnHFycnKeO46csZQWRwgh1q1bJzIyMlQXR85Yf/zxh3j06JEMGckXS2lxhFDeHFDiXFJzTty20uGvq/4jNzcX+vr60vMjR44gLy8PjRs3hrGxcZnHUWpOycnJuHHjBvT09FCjRg1Urly5VO9/EbGUFidf/kWCpR1jpceROxYRlb2YmBj4+PjA1NRUVXEkspZMFdiVK1eEt7e30NfXF506dRJpaWnC399fWiujRo0a4ty5c2UWR6k5hYWFierVq0trGuQ/WrVqJY4dO6ZTDLljKS2OEELs2rVLBAYGCmtraymOtbW1CAwMFFFRURU2jtyxipKQkCDc3NwUFau84sTHx4sZM2aIsLAwcevWLa22tLQ0MWjQoAoZR85YK1euFAMHDhRr1qwRQjxe78jDw0O4ubmJyZMn65yPnLGUFqcohoaGIiEhQXVx8rHIeaJHjx6iXbt24rfffhNvv/22aNWqlfDz8xN//fWXuH79uggICBDdu3cvszhKzGnu3LnCyclJfP3112LlypXC09NTTJ8+XezYsUMMGDBAmJmZFbug3ouIpbQ4QggREREhDAwMRJ8+fUR4eLjYvn272L59uwgPDxd9+/YVhoaG4ttvv61wceSOVZz4+Hid78lTVrHKI87OnTuFkZGRqFu3rqhevbqoXLmy2L17t9SekpKiUyylxZEz1oIFC4S5ubl46623hKOjo/j8889F5cqVxeeffy6mTZsmLC0txTfffKNTTnLFUlocIYRo3LhxoQ+NRiM8PT2l5xUtTklY5DxRtWpVceLECSGEEPfu3RMajUb88ccfUntcXJywt7cvszhKzMnV1VVs375den7u3DlRuXJlkZ2dLYQQ4uOPPxavvfaaLpsmWyylxRFCCHd3d7FkyZIi28PCwkStWrUqXBw5Y40cObLYxzvvvKPzH0u5YiktjhBC+Pr6is8++0wIIUReXp6YPXu2sLCwkFZU1rUQUFocOWN5eHiIdevWCSGEOH78uDAwMBCrVq2S2letWiW8vb11ykmuWEqLI4QQBgYGolOnTmLq1KnSY8qUKUJPT098+OGH0msVLU5JWOQ8UalSJXH58mUhhBC5ubnCwMBAxMfHS+0XLlwQlSpVKrM4SszJzMxMJCUlSc/z8vKEgYGBuH79uhDi8b9QLSwsdNo2uWIpLY4QQhgbG8tyZ3SlxZEzlp6enmjSpInw8/Mr9NG0aVOd/1jKFUtpcYQQwtLSUly8eFHrtXXr1glzc3Px22+/6VwIKC2OnLFMTU3F1atXpefGxsbizz//lJ5fuHBBWFtb65STXLGUFkcIIfbv3y9q1qwpJk+erPWjEwMDA3HmzBmdYigxTklY5DzRokULMXHiRCGEEGvWrBH29vZi3LhxUvv06dN1qpjliqPEnBo1aiRWrFghPY+OjhZmZmYiLy9PCPH4D5yuBZxcsZQWRwghmjRpIkaPHl1k+5gxY0STJk0qXBw5Y9WuXbvYe/KcOHFC5z+WcsVSWhwhHh+FLex6sB9++EGYmZmJZcuW6RRLaXHkjFW5cmWtaziqVaumdZ+5Cxcu6PwPFLliKS1Ovnv37ok+ffoIHx8fqcB8lqJCaXGKwyLnicjISGFiYiKMjIyEiYmJ2Lt3r6hdu7Zo3ry5aNGihdDX19fpBn1yxVFiThs3bhSGhobi7bffFgMHDhQWFhZaxdLy5cuFr6+vTtsmVyylxRFCiD179ghzc3NRv359MXLkSDFr1iwxa9YsMXLkSNGgQQNhYWEh9u7dW+HiyBmrX79+YsSIEUW2x8fHC41Go1NOcsVSWhwhhHjttdfE3LlzC21bv369MDQ01KkQUFocOWO1atVKbNiwocj23377TdSrV0+nnOSKpbQ4/7VmzRrh4OAgvvnmG2FoaPjMRYXS4hSGRc5TkpKSxE8//SSdtkhJSRGTJk0Sn376qdYFcWUVR4k5bd++XfTr10/06NFD68iHEELcvn1b3L59u8xjKS2OEI/He8yYMaJt27aidu3aonbt2qJt27Zi7NixWqfFKlocuWLduHGjVHd1L4tYSosjxOO7UBdXMK1bt074+flVuDhyxtq/f790zWFhwsLCxNdff61TTnLFUlqcwpw/f140a9ZMaDSa5yoqlBbnv7hODhER0UsoLy8P9+/fh6WlJTQajWriPI23ddBRTk4OkpOTFRNHzlhKiyNnLKXFISJSCj09PVhZWT13QaG0OFoxZYukcmfOnIGbm5ti4sgZS2lx5IxVXnGWLl0Kf39/vP3224iOjtZqu337NmrUqFEh4zAnbpuat02JOXHbng+LHCKZLV68GKNHj4aHhweMjY3RuXNnzJw5U2rPzc3F1atXK1wc5sRtU/O2KTEnbptu21Ys2a7uqeCKWn0x/+Hh4aHTlf5yxVFiTtw23bbNy8tLWsBLCCEOHDggqlatKiZNmiSE0H2RM6XFYU7cNjVvmxJz4rY9/wrjBs9fJqlDQkIC+vTpU+QpiRs3buD8+fNlFkeJOXHbdNu2pKQktGzZUnresmVL7N69G/7+/sjOzsaIESMqZBzmVLZxlJiTmrdNiTlx22Tw3GWSSnh7e4ulS5cW2a7rAl5yxVFiTtw23bbN2dlZ7Nu37//au9eQpv4/DuDvYza32szK7pH2wGSjoqXNZqOiB5U/jIWF0c1uWOkkI4SgKIRAii50ISKIbhLdSKjA8kEU1ajM0mnNVklhRRlmVJLrsn3/T2T/lrrffm3N0+n9giDP9+x73u89kA9nc+tw/OHDh2LQoEEiJycnqL3ktg8zsZuSu8kxE7uFfieH78lpN2nSJLhcri7XdTodJk+eHLF95JiJ3YLrZrFYUFZW1uG4wWDAlStXcOnSpT9yH2ZiNyV3k2MmdguDkMckIvLjcDjE4cOHu1yvq6sL6ovn5LYPM7GbkrvJMRO7hf4FnfwwQCIiIlIkvvH4J5WVlbh16xbevHkDABg8eDDMZjNMJlO37CPHTOz2Z2ZScjc5ZmI3ZpLbPnLMFM5unQr5XpBCNDU1CYvFIiRJEgkJCcJkMgmTySQSEhKEJEnCYrGIpqamiO0jx0zsFny3SZMmhSWTnPZhJnZTcjc5ZmK34LoFwiGn3Zw5c4TZbBaPHj3qsPbo0SORnp4u5s6dG7F95JiJ3diNmdhNqd3kmIndgusWCIecdlqtVty/f7/L9aqqKqHVaiO2jxwzsRu7MRO7KbWbHDOxW3DdAuGfkLeLiYnBx48fu1z/9OkTYmJiIraPHDOxG7sxE7sptZscM7FbcN0CCnlMUoj8/HyRkJAgysrKxIcPH3zHP3z4IMrKykRiYqIoKCiI2D5yzMRu7MZM7KbUbnLMxG7BdQuEQ047t9stVq9eLVQqlYiKihJqtVqo1WoRFRUlVCqVyMvLE263O2L7yDETu7EbM7GbUrvJMRO7BdctEH5Ozk8+fvyIe/fu+f05W0pKCmJjY7tlHzlmYrc/M5OSu8kxE7sxk9z2kWOmcHbrDIccIiIiUiS+8fgHbW1tuHnzJpxOZ4c1t9uN48ePR3QfOWZit+DILZOSu8kxE7sFh5kit48cM4WzW5dCfsFLIVwul+9DiKKiosTkyZPFq1evfOtv3rwJ6htRw7WPHDOxG7sxE7sptZscM7Ebv4U8bNavX4/Ro0fj7du3cLlc0Ol0sFgsaGxs7JZ95JiJ3f7MTEruJsdM7MZMcttHjpnC2S2gkMckhRg4cKCora31/ez1esXq1avFiBEjRENDQ9BTZbj2kWMmdmM3ZmI3pXaTYyZ2C/1ODoecdjqdTjidzg7HbTabGD58uLh+/XpQT3i49pFjJnZjN2ZiN6V2k2MmduOQEzYTJkwQx48f73TNZrOJuLi4oJ7wcO0jx0zsxm7MxG5K7SbHTOzGISdsSkpKREZGRpfreXl5QpKkiO0jx0zsxm7MxG5K7SbHTOwWXLdA+Dk5REREpEj86yoiIiJSJA45REREpEgccoiIiEiROOQQERGRInHIISIiIkXikENE9IPnz59DkiTU1NR0dxQiChGHHCJSjKVLl2L27Nl/zXWJKDAOOUQUNl+/fu2W63o8Hni93m65NhHJF4ccIvplU6dORUFBAdauXYv4+HjMmDEDDx48QEZGBrRaLQYNGoTFixejubm5w2MKCgrQp08fxMfHY9OmTfjxc0nfv3+PnJwc9O3bF7169UJGRgaePHniWz969Cji4uJw4cIFGAwGxMTEYPny5Th27BjOnz8PSZIgSRKuXbv2rx0qKythNBqhVquRmpqK6upqv3WPx4MVK1Zg5MiR0Gg0SE5Oxp49e3zrxcXFXV73xYsXyM7ORlxcHPr16wer1Yrnz5//2pNNRP8ZhxwiCsmxY8egUqlgt9uxdetWTJs2DUajEVVVVbh8+TKampqQnZ3d4THR0dGorKzEnj17sGvXLhw6dMi3vnTpUlRVVeHChQu4desWhBD4559/8O3bN985nz9/xrZt23Do0CE8fPgQe/fuRXZ2NmbOnInXr1/j9evXSE9PD5i9tbUVmZmZMBgMuHfvHoqLi1FUVOR3jtfrxfDhw3H27Fk4nU5s3rwZGzZswJkzZwAARUVFnV7327dvmDFjBnQ6HW7cuAG73Q6tVouZM2d22x0vor9OyF8MQUR/rSlTpgij0ej7ecuWLWL69Ol+57x48UIAEC6Xy/cYvV4vvF6v75z169cLvV4vhBDi8ePHAoCw2+2+9ebmZqHRaMSZM2eEEEIcOXJEABA1NTV+11qyZImwWq1B5z948KDo37+/aGtr8x07cOCAACCqq6u7fJzNZhNz5swJeN3S0lKRnJzs1/PLly9Co9GIioqKoDMS0a+L7t4Ri4j+dCkpKb7/OxwOXL16FVqttsN5DQ0NGDVqFABg4sSJkCTJt2Y2m7Fz5054PB7U19cjOjoaaWlpvvX+/fsjOTkZ9fX1vmMqlQpjx44NKXt9fT3Gjh0LtVrtl+Vn+/fvx+HDh9HY2Ii2tjZ8/foV48aNC7i3w+HA06dPodPp/I673W40NDSElJuIgsMhh4hC0rt3b9//W1tbMWvWLGzbtq3DeUOGDAnrdTUajd+g9LucOnUKRUVF2LlzJ8xmM3Q6HbZv3447d+4EfFxraytSUlJw4sSJDmsDBgz4XXGJ6AcccogobMaPH49z584hMTER0dFd/3r5eUC4ffs2kpKS0KNHD+j1enz//h137tzxvafm3bt3cLlcMBgMAa+vUqng8XiCzqvX61FaWgq32+27m3P79m2/c+x2O9LT05Gfn+879vOdmM6uO378eJw+fRoDBw5EbGxs0JmIKHz4xmMiChubzYaWlhbMnz8fd+/eRUNDAyoqKrBs2TK/IaCxsRHr1q2Dy+XCyZMnsW/fPhQWFgIAkpKSYLVakZubi5s3b8LhcGDRokUYNmwYrFZrwOsnJiaitrYWLpcLzc3Nfm9U7syCBQsgSRJyc3PhdDpRXl6OHTt2+J2TlJSEqqoqVFRU4PHjx9i0aRPu3r37r9dduHAh4uPjYbVacePGDTx79gzXrl3DmjVr8PLly//ytBLRL+KQQ0RhM3ToUNjtdng8HkyfPh1jxozB2rVrERcXh6io//+6ycnJQVtbG0wmE2w2GwoLC7Fy5Urf+pEjR5CSkoLMzEyYzWYIIVBeXo6ePXsGvH5ubi6Sk5ORmpqKAQMGwG63Bzxfq9Xi4sWLqKurg9FoxMaNGzu81LZq1SpkZWVh3rx5SEtLw7t37/zu6nR13V69euH69esYMWIEsrKyoNfrsWLFCrjdbt7ZIYoQSYgfPpyCiOg3mzp1KsaNG4fdu3d3dxQiUjjeySEiIiJF4pBDRIpVUlICrVbb6b+MjIzujkdEvxlfriIixWppaUFLS0unaxqNBsOGDYtwIiKKJA45REREpEh8uYqIiIgUiUMOERERKRKHHCIiIlIkDjlERESkSBxyiIiISJE45BAREZEiccghIiIiRfofrUlTV1nSkm8AAAAASUVORK5CYII=", "text/plain": [ "
" ] @@ -944,6 +945,14 @@ " iso_df = iso_df.set_index(\"report_date\")\n", " iso_df.plot.bar(color=[\"green\", \"red\", \"grey\"], title=iso_region, stacked=True)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08eb0375-d727-4a76-9803-f8157b199279", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From c6e681deb1b23bae954eefa689610f71e019964e Mon Sep 17 00:00:00 2001 From: bendnorman Date: Thu, 14 Nov 2024 17:49:29 -0900 Subject: [PATCH 17/25] Add healthcheck to prevent app service from starting before postgres db is ready --- docker-compose.yaml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 3dbe0165..8110fced 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,4 +1,3 @@ -version: '3.9' services: app: build: . @@ -7,7 +6,8 @@ services: environment: - API_KEY_GOOGLE_MAPS=${API_KEY_GOOGLE_MAPS} # get this value from our google account: https://console.cloud.google.com/google/maps-apis/credentials?project=dbcp-dev&supportedpurview=project depends_on: - - postgres + postgres: + condition: service_healthy volumes: - ./src/dbcp:/app/dbcp:rw - ./notebooks:/app/notebooks:rw @@ -24,3 +24,8 @@ services: - .env ports: - ${POSTGRES_PORT}:5432 + healthcheck: + test: ["CMD-SHELL", "pg_isready -U youruser"] + interval: 5s + timeout: 5s + retries: 5 From d21e5808b8cedd2c222afdecddf0d3605404c3c5 Mon Sep 17 00:00:00 2001 From: bendnorman Date: Thu, 14 Nov 2024 17:53:52 -0900 Subject: [PATCH 18/25] Add local opposition geocoding error --- src/dbcp/transform/local_opposition.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/dbcp/transform/local_opposition.py b/src/dbcp/transform/local_opposition.py index a8f982f6..30d63b23 100644 --- a/src/dbcp/transform/local_opposition.py +++ b/src/dbcp/transform/local_opposition.py @@ -1,4 +1,5 @@ """Transform functions for local opposition data.""" + from typing import Dict import pandas as pd @@ -101,6 +102,7 @@ def _transform_local_ordinances(local_ord_df: pd.DataFrame) -> pd.DataFrame: "Town of Charlton (Worcester County)": "Charlton (Worcester County)", "City of Owasso (Rogers and Tulsa Counties)": "Owasso (Rogers and Tulsa Counties)", "City of Burleson (Tarrant and Johnson Counties)": "Burleson (Tarrant and Johnson Counties)", + "Montrose City (Genesee County)": "Montrose (Genesee County)", } local.loc[:, "locality"].replace(location_corrections, inplace=True) From a5aafb4139abc07ee63cfcca934c67e9c8292828 Mon Sep 17 00:00:00 2001 From: bendnorman Date: Thu, 14 Nov 2024 18:21:59 -0900 Subject: [PATCH 19/25] Fix geocoding issue and update packages with security issues --- requirements.txt | 5 +++-- src/dbcp/transform/local_opposition.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 2072b9a9..5666d6cc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ addfips>=0.4 -fiona~=1.9.4 +fiona~=1.10.1 psycopg2~=2.9.3 pytest~=6.2.5 tqdm>=4.64.1,<5.0.0 @@ -8,7 +8,7 @@ googlemaps~=4.5.3 pandas-gbq~=0.19.1 pydata-google-auth~=1.7.0 pandera[io]~=0.8.1 -jupyterlab~=3.2.8 +jupyterlab~=4.3.0 pandas~=1.4.0 joblib~=1.2.0 beautifulsoup4~=4.11 @@ -22,3 +22,4 @@ coloredlogs~=15.0.1 scipy~=1.14.1 google-cloud-storage~=2.18.2 geopandas~=1.0.1 +gcsfs~=2024.10.0 diff --git a/src/dbcp/transform/local_opposition.py b/src/dbcp/transform/local_opposition.py index 7092b253..ff26d66b 100644 --- a/src/dbcp/transform/local_opposition.py +++ b/src/dbcp/transform/local_opposition.py @@ -102,6 +102,7 @@ def _transform_local_ordinances(local_ord_df: pd.DataFrame) -> pd.DataFrame: "Town of Charlton (Worcester County)": "Charlton (Worcester County)", "City of Owasso (Rogers and Tulsa Counties)": "Owasso (Rogers and Tulsa Counties)", "City of Burleson (Tarrant and Johnson Counties)": "Burleson (Tarrant and Johnson Counties)", + "Montrose City (Genesee County)": "Montrose (Genesee County)", } local.loc[:, "locality"].replace(location_corrections, inplace=True) From 7dce1a5eb8742a4a97394fd6debc9e9eebd97045 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Fri, 15 Nov 2024 14:50:37 -0500 Subject: [PATCH 20/25] Update BR with 2025-6 data --- .../ballot_ready/ballot_ready_update.ipynb | 283 ++++++++++++++++++ src/dbcp/etl.py | 2 +- src/dbcp/transform/ballot_ready.py | 24 ++ 3 files changed, 308 insertions(+), 1 deletion(-) create mode 100644 notebooks/data_updates/ballot_ready/ballot_ready_update.ipynb diff --git a/notebooks/data_updates/ballot_ready/ballot_ready_update.ipynb b/notebooks/data_updates/ballot_ready/ballot_ready_update.ipynb new file mode 100644 index 00000000..0c12c713 --- /dev/null +++ b/notebooks/data_updates/ballot_ready/ballot_ready_update.ipynb @@ -0,0 +1,283 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from google.cloud import storage" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "# Save old Ballot Ready data locally\n", + "client = storage.Client()\n", + "bucket = client.bucket(\"dgm-archive\")\n", + "blobs = bucket.list_blobs(prefix=\"ballot_ready/Climate Partners_Upcoming Races_All Tiers_20240524.csv\", versions=True)\n", + "for i, blob in enumerate(blobs):\n", + " if i>1:\n", + " exit # There should only be one file that has this name.\n", + " blob.download_to_filename(\"ballot_ready_2024_05_24.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Save old Ballot Ready data locally\n", + "client = storage.Client()\n", + "bucket = client.bucket(\"dgm-archive\")\n", + "blobs = bucket.list_blobs(prefix=\"ballot_ready/Climate Partners_Upcoming Races_2025-2026_20240826.csv\", versions=True)\n", + "for i, blob in enumerate(blobs):\n", + " if i>1:\n", + " exit # There should only be one file that has this name.\n", + " blob.download_to_filename(\"ballot_ready_2024_08_26.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_24492/3133798857.py:2: DtypeWarning: Columns (11) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " old_br = pd.read_csv(\"ballot_ready_2024_05_24.csv\")\n" + ] + } + ], + "source": [ + "# Import old Ballot Ready data\n", + "old_br = pd.read_csv(\"ballot_ready_2024_05_24.csv\")\n", + "# Import new Ballot Ready data\n", + "new_br = pd.read_csv(\"ballot_ready_2024_08_26.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empty DataFrame\n", + "Columns: [id, election_id, election_name, election_day, race_id, geofence_id, is_primary, is_runoff, is_unexpired, position_id, mtfcc, geo_id, position_name, sub_area_name, sub_area_value, sub_area_name_secondary, sub_area_value_secondary, state, level, tier, is_judicial, is_retention, number_of_seats, normalized_position_id, normalized_position_name, position_description, frequency, reference_year, partisan_type, counties, race_created_at, race_updated_at]\n", + "Index: []\n", + "\n", + "[0 rows x 32 columns]\n", + "Empty DataFrame\n", + "Columns: [id, election_id, election_name, election_day, race_id, geofence_id, is_primary, is_runoff, is_unexpired, position_id, mtfcc, geo_id, position_name, sub_area_name, sub_area_value, sub_area_name_secondary, sub_area_value_secondary, state, level, tier, is_judicial, is_retention, number_of_seats, normalized_position_id, normalized_position_name, position_description, frequency, reference_year, partisan_type, counties, race_created_at, race_updated_at]\n", + "Index: []\n", + "\n", + "[0 rows x 32 columns]\n", + "Empty DataFrame\n", + "Columns: [id, election_id, election_name, election_day, race_id, geofence_id, is_primary, is_runoff, is_unexpired, position_id, mtfcc, geo_id, position_name, sub_area_name, sub_area_value, sub_area_name_secondary, sub_area_value_secondary, state, level, tier, is_judicial, is_retention, number_of_seats, normalized_position_id, normalized_position_name, position_description, frequency, reference_year, partisan_type, counties, race_created_at, race_updated_at]\n", + "Index: []\n", + "\n", + "[0 rows x 32 columns]\n" + ] + } + ], + "source": [ + "# Confirm the data doesn't overlap:\n", + "print(new_br[new_br.election_id.isin(old_br.election_id)]) # Elections?\n", + "print(new_br[new_br.race_id.isin(old_br.race_id)]) # Races?\n", + "print(new_br[new_br.election_day.isin(old_br.election_day)]) # Election timespans?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Final election date in old data: 2024-12-14\n", + "First election date in new data: 2025-02-04\n" + ] + } + ], + "source": [ + "# Confirm by looking at dates covered.\n", + "print(f\"Final election date in old data: {old_br.election_day.max()}\")\n", + "print(f\"First election date in new data: {new_br.election_day.min()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "New columns in the new data: set()\n", + "Columns no longer in the new data: set()\n" + ] + } + ], + "source": [ + "# New columns in the data\n", + "print(f\"New columns in the new data: {set(new_br.columns).difference(old_br.columns)}\")\n", + "# Missing columns in the new data - None!\n", + "print(f\"Columns no longer in the new data: {set(old_br.columns).difference(new_br.columns)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are some geographic fields in the data that we aren't currently using. The [Ballot Ready](https://support.ballotready.org/interpreting-mtfcc-and-geoid) documentation notes:\n", + "\n", + "\"Mtfcc and geo_id fields should be treated as pairs. Meaning that there could be more than one record in the census file with the same geo_id, but the mtfcc value identifies the type of census entity. BallotReady datasets should be joined to the census file on both the mtfcc and geo_id.\"\n", + "\n", + "\"mtfcc values that start with X will not have any corresponding entry in the census file. These mtfcc/geo_id pairs are for custom boundaries that BallotReady collected, that are not available via the census. Note that there's not one clear explanation about how to use the custom mtfcc values.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 G5420\n", + "1 G5420\n", + "2 X0102\n", + "3 X0102\n", + "4 G5420\n", + "Name: mtfcc, dtype: object" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_br.mtfcc.head(5) # A 5 digit MAF/TIGER feature class code. Those starting with X come from Ballot Ready's research." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2 770\n", + "4 992\n", + "5 64599\n", + "7 102909\n", + "8 2\n", + "9 12\n", + "10 75211\n", + "12 8605\n", + "13 4\n", + "15 14\n", + "16 6\n", + "Name: geo_id, dtype: int64" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# GEO IDs vary in length based on what information they contain.\n", + "# https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html\n", + "new_br.geo_id.str.len().value_counts().sort_index()\n", + "# 2: State FIPS\n", + "# 4: State FIPS + Congressional district\n", + "# 5: State FIPS + County FIPS\n", + "# 7: State FIPS + 5-digit place\n", + "# 8: Not a valid length described by the Census - e.g., 53059.C7 - need to be normalized\n", + "# 9: Not a valid length described by the Census - e.g., 4205-2-13 - need to be normalized\n", + "# 10: State FIPS + County FIPS + County sub-division\n", + "# 12: State FIPS + County FIPS + Tract + Block Group\n", + "# 13: Not a valid length described by the Census - e.g., 53063.8 R/S/B - need to be normalized\n", + "# 15: State FIPS + County FIPS + Tract + Block\n", + "# 16: State FIPS + County FIPS + Tract + Block + Suffix" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Regarding the `geofence_id`, the documentation notes:\n", + "\"Depending on the scope of your export, there can be multiple geofences for the same mtfcc/geo_id pair that are distinguished by the valid_from and valid_to fields. That's how we track how the boundaries for a given political jurisdiction can change over time (due to redistricting, annexations, etc.).\"" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_br.set_index(['race_id', 'geofence_id']).index.is_unique" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We don't see `valid_to` and `valid_from` fields in our CSV, and each race is only associated with one `geofence_id` in the data sample. For now, we use these fields to validate our geocoding, but until we need more granular data they don't seem to necessarily serve our use case better than the existing geocoding workflow." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dbcp-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/dbcp/etl.py b/src/dbcp/etl.py index 0c39a4d4..da138a9f 100644 --- a/src/dbcp/etl.py +++ b/src/dbcp/etl.py @@ -149,7 +149,7 @@ def etl_energy_communities_by_county() -> dict[str, pd.DataFrame]: def etl_ballot_ready() -> dict[str, pd.DataFrame]: """ETL Ballot Ready election data.""" - source_uri = "gs://dgm-archive/ballot_ready/Climate Partners_Upcoming Races_All Tiers_20240524.csv" + source_uri = "gs://dgm-archive/ballot_ready/Climate Partners_Upcoming Races_2025-2026_20240826.csv" raw_df = dbcp.extract.ballot_ready.extract(source_uri) transformed = dbcp.transform.ballot_ready.transform(raw_df) return transformed diff --git a/src/dbcp/transform/ballot_ready.py b/src/dbcp/transform/ballot_ready.py index a774b3f0..4c72c39b 100644 --- a/src/dbcp/transform/ballot_ready.py +++ b/src/dbcp/transform/ballot_ready.py @@ -198,6 +198,30 @@ def _explode_counties(raw_ballot_ready: pd.DataFrame) -> pd.DataFrame: ballot_ready = pd.concat(valdez_corrections_dfs + [ballot_ready]) + # Validate the geocoding against the GEO IDs that contain a state and county FIPS ID + # GEO IDs vary in length based on what information they contain. + # https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html + # These ones contain both state and county FIPS. Some have letters or non FIPs + # characters, so we shouldn't expect a perfect match. + state_match = ballot_ready.geo_id.str[0:2] == ballot_ready.state_id_fips + logger.info( + f"State FIPS codes:{sum(state_match)} of {len(state_match)} geocoded state FIPS IDs match the Ballot Ready data ({sum(state_match)/len(state_match):.0%})" + ) + assert sum(state_match) / len(state_match) > 0.99 + + # All GEO IDs contain the state FIPS code. But only these contain the County FIPS: + # 5 digits: State FIPS + County FIPS + # 10 digits: State FIPS + County FIPS + County sub-division + # 12 digits: State FIPS + County FIPS + Tract + Block Group + # 15 digits: State FIPS + County FIPS + Tract + Block + # 16 digits: State FIPS + County FIPS + Tract + Block + Suffix + geo_ids = ballot_ready.loc[ballot_ready.geo_id.str.len().isin([5, 10, 12, 15, 16])] + county_match = geo_ids.geo_id.str[0:5] == geo_ids.county_id_fips + logger.info( + f"County FIPS codes:{sum(county_match)} of {len(county_match)} geocoded state FIPS IDs match the Ballot Ready data ({sum(county_match)/len(county_match):.0%})" + ) + assert sum(county_match) / len(county_match) > 0.85 + # Drop unused columns ballot_ready = ballot_ready.drop(columns=["position_description"]) ballot_ready = ballot_ready.rename( From 7d2f09343d2857fe43e7a79553c7db857efe9898 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Fri, 15 Nov 2024 14:57:47 -0500 Subject: [PATCH 21/25] Update notebook to make new data inspection easier --- .../ballot_ready/ballot_ready_update.ipynb | 123 +++++++++++++++++- 1 file changed, 119 insertions(+), 4 deletions(-) diff --git a/notebooks/data_updates/ballot_ready/ballot_ready_update.ipynb b/notebooks/data_updates/ballot_ready/ballot_ready_update.ipynb index 0c12c713..eae363c2 100644 --- a/notebooks/data_updates/ballot_ready/ballot_ready_update.ipynb +++ b/notebooks/data_updates/ballot_ready/ballot_ready_update.ipynb @@ -33,9 +33,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/thinky/miniforge3/envs/dbcp-dev/lib/python3.10/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n", + "/home/thinky/miniforge3/envs/dbcp-dev/lib/python3.10/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n" + ] + } + ], "source": [ "# Save old Ballot Ready data locally\n", "client = storage.Client()\n", @@ -49,14 +60,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_24492/3133798857.py:2: DtypeWarning: Columns (11) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_69213/1466442055.py:2: DtypeWarning: Columns (11) have mixed types. Specify dtype option on import or set low_memory=False.\n", " old_br = pd.read_csv(\"ballot_ready_2024_05_24.csv\")\n" ] } @@ -257,6 +268,110 @@ "source": [ "We don't see `valid_to` and `valid_from` fields in our CSV, and each race is only associated with one `geofence_id` in the data sample. For now, we use these fields to validate our geocoding, but until we need more granular data they don't seem to necessarily serve our use case better than the existing geocoding workflow." ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Transformed Data\n", + "\n", + "Let's compare the geo ID and the geocoded state and county FIPS columns to ensure geocoding works as expected." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2025-02-04 00:00:00\n", + "2026-12-12 00:00:00\n" + ] + } + ], + "source": [ + "transformed_br = pd.read_parquet('../../../data/output/data_mart/br_election_data.parquet')\n", + "print(transformed_br.election_day.min())\n", + "print(transformed_br.election_day.max())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'old_br' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[13], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mold_br\u001b[49m\u001b[38;5;241m.\u001b[39minfo()\n", + "\u001b[0;31mNameError\u001b[0m: name 'old_br' is not defined" + ] + } + ], + "source": [ + "old_br.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 226079 entries, 0 to 226078\n", + "Data columns (total 31 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 race_id 226079 non-null Int64 \n", + " 1 is_primary 226079 non-null boolean \n", + " 2 is_runoff 226079 non-null boolean \n", + " 3 is_unexpired 226079 non-null boolean \n", + " 4 number_of_seats 226079 non-null Int64 \n", + " 5 race_created_at 226079 non-null datetime64[ns]\n", + " 6 race_updated_at 226079 non-null datetime64[ns]\n", + " 7 election_id 226079 non-null Int64 \n", + " 8 position_id 226079 non-null Int64 \n", + " 9 election_name 226079 non-null string \n", + " 10 election_day 226079 non-null datetime64[ns]\n", + " 11 position_name 226079 non-null string \n", + " 12 reference_year 226076 non-null string \n", + " 13 sub_area_name 90098 non-null string \n", + " 14 sub_area_value 98579 non-null string \n", + " 15 sub_area_name_secondary 5170 non-null string \n", + " 16 sub_area_value_secondary 5669 non-null string \n", + " 17 level 226079 non-null string \n", + " 18 tier 226079 non-null Int64 \n", + " 19 is_judicial 226079 non-null boolean \n", + " 20 is_retention 226079 non-null boolean \n", + " 21 normalized_position_id 226079 non-null Int64 \n", + " 22 normalized_position_name 226079 non-null string \n", + " 23 frequency 226076 non-null string \n", + " 24 partisan_type 225721 non-null string \n", + " 25 county_name 226079 non-null string \n", + " 26 state_name 226079 non-null string \n", + " 27 raw_county 226079 non-null string \n", + " 28 raw_state 226079 non-null string \n", + " 29 state_id_fips 226079 non-null string \n", + " 30 county_id_fips 226079 non-null string \n", + "dtypes: Int64(6), boolean(5), datetime64[ns](3), string(17)\n", + "memory usage: 48.3 MB\n" + ] + } + ], + "source": [ + "transformed_br.info()" + ] } ], "metadata": { From f63cf0eb0dfd1e6551c39cb64dd2062a36d87253 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Fri, 15 Nov 2024 15:18:34 -0500 Subject: [PATCH 22/25] Add precaution against nulls in county_id_fips --- src/dbcp/transform/ballot_ready.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/dbcp/transform/ballot_ready.py b/src/dbcp/transform/ballot_ready.py index 4c72c39b..4e5b2fa5 100644 --- a/src/dbcp/transform/ballot_ready.py +++ b/src/dbcp/transform/ballot_ready.py @@ -215,7 +215,10 @@ def _explode_counties(raw_ballot_ready: pd.DataFrame) -> pd.DataFrame: # 12 digits: State FIPS + County FIPS + Tract + Block Group # 15 digits: State FIPS + County FIPS + Tract + Block # 16 digits: State FIPS + County FIPS + Tract + Block + Suffix - geo_ids = ballot_ready.loc[ballot_ready.geo_id.str.len().isin([5, 10, 12, 15, 16])] + geo_ids = ballot_ready.loc[ + (ballot_ready.geo_id.str.len().isin([5, 10, 12, 15, 16])) + & (ballot_ready.county_id_fips.notnull()) + ] county_match = geo_ids.geo_id.str[0:5] == geo_ids.county_id_fips logger.info( f"County FIPS codes:{sum(county_match)} of {len(county_match)} geocoded state FIPS IDs match the Ballot Ready data ({sum(county_match)/len(county_match):.0%})" From 8b2dcbe2ed38c17bff1b8583cf5546123ff4f0d2 Mon Sep 17 00:00:00 2001 From: bendnorman Date: Fri, 15 Nov 2024 11:45:12 -0900 Subject: [PATCH 23/25] Add message to geoid ballot ready assertion and use existing helpers in ballot ready notebook --- .../ballot_ready/ballot_ready_update.ipynb | 221 +++++++++--------- src/dbcp/transform/ballot_ready.py | 10 +- 2 files changed, 110 insertions(+), 121 deletions(-) diff --git a/notebooks/data_updates/ballot_ready/ballot_ready_update.ipynb b/notebooks/data_updates/ballot_ready/ballot_ready_update.ipynb index eae363c2..b0ea0d32 100644 --- a/notebooks/data_updates/ballot_ready/ballot_ready_update.ipynb +++ b/notebooks/data_updates/ballot_ready/ballot_ready_update.ipynb @@ -1,13 +1,8 @@ { "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -15,22 +10,6 @@ "from google.cloud import storage" ] }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [], - "source": [ - "# Save old Ballot Ready data locally\n", - "client = storage.Client()\n", - "bucket = client.bucket(\"dgm-archive\")\n", - "blobs = bucket.list_blobs(prefix=\"ballot_ready/Climate Partners_Upcoming Races_All Tiers_20240524.csv\", versions=True)\n", - "for i, blob in enumerate(blobs):\n", - " if i>1:\n", - " exit # There should only be one file that has this name.\n", - " blob.download_to_filename(\"ballot_ready_2024_05_24.csv\")" - ] - }, { "cell_type": "code", "execution_count": 15, @@ -40,48 +19,28 @@ "name": "stderr", "output_type": "stream", "text": [ - "/home/thinky/miniforge3/envs/dbcp-dev/lib/python3.10/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", - " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n", - "/home/thinky/miniforge3/envs/dbcp-dev/lib/python3.10/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", - " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n" + "/tmp/ipykernel_33/2772101518.py:9: DtypeWarning: Columns (11) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " old_br = pd.read_csv(old_br_path)\n" ] } ], "source": [ "# Save old Ballot Ready data locally\n", - "client = storage.Client()\n", - "bucket = client.bucket(\"dgm-archive\")\n", - "blobs = bucket.list_blobs(prefix=\"ballot_ready/Climate Partners_Upcoming Races_2025-2026_20240826.csv\", versions=True)\n", - "for i, blob in enumerate(blobs):\n", - " if i>1:\n", - " exit # There should only be one file that has this name.\n", - " blob.download_to_filename(\"ballot_ready_2024_08_26.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_69213/1466442055.py:2: DtypeWarning: Columns (11) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " old_br = pd.read_csv(\"ballot_ready_2024_05_24.csv\")\n" - ] - } - ], - "source": [ + "\n", + "from dbcp.extract.helpers import cache_gcs_archive_file_locally\n", + "\n", + "old_br_path = cache_gcs_archive_file_locally(\"gs://dgm-archive/ballot_ready/Climate Partners_Upcoming Races_All Tiers_20240524.csv\")\n", + "new_br_path = cache_gcs_archive_file_locally(\"gs://dgm-archive/ballot_ready/Climate Partners_Upcoming Races_2025-2026_20240826.csv\")\n", + "\n", "# Import old Ballot Ready data\n", - "old_br = pd.read_csv(\"ballot_ready_2024_05_24.csv\")\n", + "old_br = pd.read_csv(old_br_path)\n", "# Import new Ballot Ready data\n", - "new_br = pd.read_csv(\"ballot_ready_2024_08_26.csv\")" + "new_br = pd.read_csv(new_br_path)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -91,18 +50,12 @@ "Empty DataFrame\n", "Columns: [id, election_id, election_name, election_day, race_id, geofence_id, is_primary, is_runoff, is_unexpired, position_id, mtfcc, geo_id, position_name, sub_area_name, sub_area_value, sub_area_name_secondary, sub_area_value_secondary, state, level, tier, is_judicial, is_retention, number_of_seats, normalized_position_id, normalized_position_name, position_description, frequency, reference_year, partisan_type, counties, race_created_at, race_updated_at]\n", "Index: []\n", - "\n", - "[0 rows x 32 columns]\n", "Empty DataFrame\n", "Columns: [id, election_id, election_name, election_day, race_id, geofence_id, is_primary, is_runoff, is_unexpired, position_id, mtfcc, geo_id, position_name, sub_area_name, sub_area_value, sub_area_name_secondary, sub_area_value_secondary, state, level, tier, is_judicial, is_retention, number_of_seats, normalized_position_id, normalized_position_name, position_description, frequency, reference_year, partisan_type, counties, race_created_at, race_updated_at]\n", "Index: []\n", - "\n", - "[0 rows x 32 columns]\n", "Empty DataFrame\n", "Columns: [id, election_id, election_name, election_day, race_id, geofence_id, is_primary, is_runoff, is_unexpired, position_id, mtfcc, geo_id, position_name, sub_area_name, sub_area_value, sub_area_name_secondary, sub_area_value_secondary, state, level, tier, is_judicial, is_retention, number_of_seats, normalized_position_id, normalized_position_name, position_description, frequency, reference_year, partisan_type, counties, race_created_at, race_updated_at]\n", - "Index: []\n", - "\n", - "[0 rows x 32 columns]\n" + "Index: []\n" ] } ], @@ -115,7 +68,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -135,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -167,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -181,7 +134,7 @@ "Name: mtfcc, dtype: object" ] }, - "execution_count": 37, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -192,7 +145,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -212,7 +165,7 @@ "Name: geo_id, dtype: int64" ] }, - "execution_count": 44, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -244,7 +197,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -253,7 +206,7 @@ "True" ] }, - "execution_count": 56, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -280,15 +233,15 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "2025-02-04 00:00:00\n", - "2026-12-12 00:00:00\n" + "2023-02-07 00:00:00\n", + "2024-12-14 00:00:00\n" ] } ], @@ -300,18 +253,52 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 23, "metadata": {}, "outputs": [ { - "ename": "NameError", - "evalue": "name 'old_br' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[13], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mold_br\u001b[49m\u001b[38;5;241m.\u001b[39minfo()\n", - "\u001b[0;31mNameError\u001b[0m: name 'old_br' is not defined" + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 247247 entries, 0 to 247246\n", + "Data columns (total 32 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 247247 non-null int64 \n", + " 1 election_id 247247 non-null int64 \n", + " 2 election_name 247247 non-null object \n", + " 3 election_day 247247 non-null object \n", + " 4 race_id 247247 non-null int64 \n", + " 5 geofence_id 246694 non-null float64\n", + " 6 is_primary 247247 non-null object \n", + " 7 is_runoff 247247 non-null object \n", + " 8 is_unexpired 247247 non-null object \n", + " 9 position_id 247247 non-null int64 \n", + " 10 mtfcc 247235 non-null object \n", + " 11 geo_id 247221 non-null object \n", + " 12 position_name 247247 non-null object \n", + " 13 sub_area_name 103388 non-null object \n", + " 14 sub_area_value 114475 non-null object \n", + " 15 sub_area_name_secondary 6619 non-null object \n", + " 16 sub_area_value_secondary 7226 non-null object \n", + " 17 state 247247 non-null object \n", + " 18 level 247247 non-null object \n", + " 19 tier 247247 non-null int64 \n", + " 20 is_judicial 247247 non-null object \n", + " 21 is_retention 247247 non-null object \n", + " 22 number_of_seats 247247 non-null int64 \n", + " 23 normalized_position_id 247247 non-null int64 \n", + " 24 normalized_position_name 247247 non-null object \n", + " 25 position_description 247203 non-null object \n", + " 26 frequency 246845 non-null object \n", + " 27 reference_year 246845 non-null float64\n", + " 28 partisan_type 247022 non-null object \n", + " 29 counties 247247 non-null object \n", + " 30 race_created_at 247247 non-null object \n", + " 31 race_updated_at 247247 non-null object \n", + "dtypes: float64(2), int64(7), object(23)\n", + "memory usage: 60.4+ MB\n" ] } ], @@ -321,7 +308,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -329,43 +316,43 @@ "output_type": "stream", "text": [ "\n", - "RangeIndex: 226079 entries, 0 to 226078\n", + "RangeIndex: 218874 entries, 0 to 218873\n", "Data columns (total 31 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 race_id 226079 non-null Int64 \n", - " 1 is_primary 226079 non-null boolean \n", - " 2 is_runoff 226079 non-null boolean \n", - " 3 is_unexpired 226079 non-null boolean \n", - " 4 number_of_seats 226079 non-null Int64 \n", - " 5 race_created_at 226079 non-null datetime64[ns]\n", - " 6 race_updated_at 226079 non-null datetime64[ns]\n", - " 7 election_id 226079 non-null Int64 \n", - " 8 position_id 226079 non-null Int64 \n", - " 9 election_name 226079 non-null string \n", - " 10 election_day 226079 non-null datetime64[ns]\n", - " 11 position_name 226079 non-null string \n", - " 12 reference_year 226076 non-null string \n", - " 13 sub_area_name 90098 non-null string \n", - " 14 sub_area_value 98579 non-null string \n", - " 15 sub_area_name_secondary 5170 non-null string \n", - " 16 sub_area_value_secondary 5669 non-null string \n", - " 17 level 226079 non-null string \n", - " 18 tier 226079 non-null Int64 \n", - " 19 is_judicial 226079 non-null boolean \n", - " 20 is_retention 226079 non-null boolean \n", - " 21 normalized_position_id 226079 non-null Int64 \n", - " 22 normalized_position_name 226079 non-null string \n", - " 23 frequency 226076 non-null string \n", - " 24 partisan_type 225721 non-null string \n", - " 25 county_name 226079 non-null string \n", - " 26 state_name 226079 non-null string \n", - " 27 raw_county 226079 non-null string \n", - " 28 raw_state 226079 non-null string \n", - " 29 state_id_fips 226079 non-null string \n", - " 30 county_id_fips 226079 non-null string \n", + " 0 race_id 218874 non-null Int64 \n", + " 1 is_primary 218874 non-null boolean \n", + " 2 is_runoff 218874 non-null boolean \n", + " 3 is_unexpired 218874 non-null boolean \n", + " 4 number_of_seats 218874 non-null Int64 \n", + " 5 race_created_at 218874 non-null datetime64[ns]\n", + " 6 race_updated_at 218874 non-null datetime64[ns]\n", + " 7 election_id 218874 non-null Int64 \n", + " 8 position_id 218874 non-null Int64 \n", + " 9 election_name 218874 non-null string \n", + " 10 election_day 218874 non-null datetime64[ns]\n", + " 11 position_name 218874 non-null string \n", + " 12 reference_year 218638 non-null string \n", + " 13 sub_area_name 86507 non-null string \n", + " 14 sub_area_value 96018 non-null string \n", + " 15 sub_area_name_secondary 4684 non-null string \n", + " 16 sub_area_value_secondary 5167 non-null string \n", + " 17 level 218874 non-null string \n", + " 18 tier 218874 non-null Int64 \n", + " 19 is_judicial 218874 non-null boolean \n", + " 20 is_retention 218874 non-null boolean \n", + " 21 normalized_position_id 218874 non-null Int64 \n", + " 22 normalized_position_name 218874 non-null string \n", + " 23 frequency 218638 non-null string \n", + " 24 partisan_type 218663 non-null string \n", + " 25 county_name 218874 non-null string \n", + " 26 state_name 218874 non-null string \n", + " 27 raw_county 218874 non-null string \n", + " 28 raw_state 218874 non-null string \n", + " 29 state_id_fips 218874 non-null string \n", + " 30 county_id_fips 218874 non-null string \n", "dtypes: Int64(6), boolean(5), datetime64[ns](3), string(17)\n", - "memory usage: 48.3 MB\n" + "memory usage: 46.8 MB\n" ] } ], @@ -376,7 +363,7 @@ ], "metadata": { "kernelspec": { - "display_name": "dbcp-dev", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -390,9 +377,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.10.15" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/src/dbcp/transform/ballot_ready.py b/src/dbcp/transform/ballot_ready.py index 4e5b2fa5..248af119 100644 --- a/src/dbcp/transform/ballot_ready.py +++ b/src/dbcp/transform/ballot_ready.py @@ -1,4 +1,5 @@ """Module for cleaning Ballot Ready data.""" + import logging import pandas as pd @@ -204,10 +205,11 @@ def _explode_counties(raw_ballot_ready: pd.DataFrame) -> pd.DataFrame: # These ones contain both state and county FIPS. Some have letters or non FIPs # characters, so we shouldn't expect a perfect match. state_match = ballot_ready.geo_id.str[0:2] == ballot_ready.state_id_fips - logger.info( - f"State FIPS codes:{sum(state_match)} of {len(state_match)} geocoded state FIPS IDs match the Ballot Ready data ({sum(state_match)/len(state_match):.0%})" - ) - assert sum(state_match) / len(state_match) > 0.99 + expected_state_match = 0.99 + result_state_match_coverage = sum(state_match) / len(state_match) + assert ( + sum(state_match) / len(state_match) > expected_state_match + ), f"State FIPS codes:{sum(state_match)} of {len(state_match)} geocoded state FIPS IDs match the Ballot Ready data ({result_state_match_coverage:.0%}). Expected atleast {expected_state_match:.0%}" # All GEO IDs contain the state FIPS code. But only these contain the County FIPS: # 5 digits: State FIPS + County FIPS From ee950a03b706cdc1724396772fb5d6760a4867d6 Mon Sep 17 00:00:00 2001 From: bendnorman Date: Fri, 15 Nov 2024 11:53:11 -0900 Subject: [PATCH 24/25] Remove unused vars in gridstatus update notebook --- .../gridstatus/quarterly_update.ipynb | 25 +++---------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/notebooks/data_updates/gridstatus/quarterly_update.ipynb b/notebooks/data_updates/gridstatus/quarterly_update.ipynb index b970e934..3631db10 100644 --- a/notebooks/data_updates/gridstatus/quarterly_update.ipynb +++ b/notebooks/data_updates/gridstatus/quarterly_update.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "537fc080-51d5-43fb-8780-33ac2c2a5228", "metadata": {}, "outputs": [ @@ -21,23 +21,6 @@ "In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).\n", " import geopandas as gpd\n" ] - }, - { - "data": { - "text/plain": [ - "{'miso': '1728242350923420',\n", - " 'miso-pre-2017': '1709776311574737',\n", - " 'caiso': '1728242351254356',\n", - " 'pjm': '1728242351606642',\n", - " 'ercot': '1728242351929200',\n", - " 'spp': '1728242352244156',\n", - " 'nyiso': '1728242352584485',\n", - " 'isone': '1728242352913470'}" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -57,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 159, + "execution_count": 6, "id": "a89c74f3-79e6-4d52-b0d9-70b04544b417", "metadata": {}, "outputs": [ @@ -80,6 +63,7 @@ "from datetime import datetime\n", "\n", "def get_generation_number_closest_to_date(bucket_name, blob_name, target_date):\n", + " \"\"\"Find the first GCS blob that was modified after the target_date.\"\"\"\n", " client = storage.Client()\n", " bucket = client.bucket(bucket_name)\n", "\n", @@ -87,14 +71,11 @@ " blobs = bucket.list_blobs(prefix=blob_name, versions=True)\n", "\n", " # Filter and sort blobs by time difference\n", - " closest_blob = None\n", - " min_time_diff = float('inf')\n", " target_timestamp = target_date.timestamp()\n", " \n", " blobs = sorted(blobs, key=lambda blob: blob.updated.timestamp())\n", "\n", " for blob in blobs:\n", - " # Get the last modified time and calculate difference from target date\n", " last_modified = blob.updated.timestamp()\n", "\n", " if target_timestamp < last_modified:\n", From 487f4c336d060d21da4b5ba5537477c1e9814182 Mon Sep 17 00:00:00 2001 From: bendnorman Date: Tue, 19 Nov 2024 15:30:28 -0900 Subject: [PATCH 25/25] Update expected number of gs projects --- src/dbcp/transform/gridstatus.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dbcp/transform/gridstatus.py b/src/dbcp/transform/gridstatus.py index 7f62924e..e5b5ec2f 100644 --- a/src/dbcp/transform/gridstatus.py +++ b/src/dbcp/transform/gridstatus.py @@ -950,8 +950,9 @@ def _normalize_project_locations(iso_df: pd.DataFrame) -> pd.DataFrame: duplicate_locations = geocoded_locations[ geocoded_locations[["county_id_fips", "project_id"]].duplicated(keep=False) ] + n_expected_duplicates = 118 assert ( - len(duplicate_locations) <= 116 + len(duplicate_locations) <= n_expected_duplicates ), f"Found more duplicate locations in Grid Status location table than expected:\n {duplicate_locations}" return geocoded_locations