Merge pull request #18 from VolpeUSDOT/RDR_2024_2_1

Upload 2024.2.1 patch files
VolpeUSDOT · Feb 4, 2025 · 4df1c4a · 4df1c4a
2 parents bf9f189 + 495b7f7
commit 4df1c4a
Show file tree

Hide file tree

Showing 32 changed files with 85,084 additions and 641 deletions.
diff --git a/changelog.md b/changelog.md
@@ -1,5 +1,10 @@
 # RDR Changelog
 
+## v2024_2_1
+The RDR 2024.2.1 public release includes a revision to the Benefits Analysis Tool, specifically the TAZ Attribute Overlay tool, that enables the user to overlay geographic attributes onto their TAZs. This tool was formerly called the Equity Overlay tool, but the tool actually provides a more generalized overlay functionality and can apply any attribute to TAZs; therefore it has been renamed to more accurately represent its functionality.
+
+The previous example/default approach applying disadvantaged community metrics has been replaced with an example/default showing the application of census tract level poverty metrics. 
+
 ## v2024_2
 The RDR 2024.2 public release includes updates across the entire tool suite, primarily focused on use of publicly available data to generate an RDR analysis. The documentation has been expanded to include public data workflows as well as a new Reference Scenario based on the 2022 Ferndale earthquake. Updated visualizations, particularly in the Tableau workbook, provide better insight into where and under what scenarios benefits are found. Several highlights of the release are detailed below.
 

diff --git a/config/taz_attribute_overlay/cb_2023_us_tract_500k.zip b/config/taz_attribute_overlay/cb_2023_us_tract_500k.zip
diff --git a/config/taz_attribute_overlay/poverty.csv b/config/taz_attribute_overlay/poverty.csv
diff --git a/documentation/RDR-Tool-Flyer_final.pdf b/documentation/RDR-Tool-Flyer_final.pdf
diff --git a/documentation/RDR_Checklist_final.pdf b/documentation/RDR_Checklist_final.pdf
diff --git a/documentation/RDR_GettingStarted_final.pdf b/documentation/RDR_GettingStarted_final.pdf
diff --git a/documentation/RDR_ScenarioExamples_final.pdf b/documentation/RDR_ScenarioExamples_final.pdf
diff --git a/documentation/RDR_TechnicalDocument_final.pdf b/documentation/RDR_TechnicalDocument_final.pdf
diff --git a/documentation/RDR_UserGuide_final.pdf b/documentation/RDR_UserGuide_final.pdf
diff --git a/helper_tools/base_year_run/base_year_run.py b/helper_tools/base_year_run/base_year_run.py
@@ -14,8 +14,8 @@
 import rdr_supporting
 import rdr_CompileAE
 
-VERSION_NUMBER = "2024.2"
-VERSION_DATE = "12/16/2024"
+VERSION_NUMBER = "2024.2.1"
+VERSION_DATE = "2/3/2025"
 # ---------------------------------------------------------------------------------------------------
 # The following code processes an existing scenario configuration to automatically
 # generate the outputs of all of the AequilibraE base year runs into one consolidated CSV file

diff --git a/helper_tools/baseline_network_run/baseline_network_run.py b/helper_tools/baseline_network_run/baseline_network_run.py
@@ -13,8 +13,8 @@
 import rdr_setup
 import rdr_supporting
 
-VERSION_NUMBER = "2024.2"
-VERSION_DATE = "12/16/2024"
+VERSION_NUMBER = "2024.2.1"
+VERSION_DATE = "2/3/2025"
 # ---------------------------------------------------------------------------------------------------
 # The following code generates AequilibraE outputs for a baseline scenario configuration
 # with no hazard disruption and no resilience project improvements. Users can run this helper tool

diff --git a/helper_tools/benefits_analysis/MetricsByTAZ_categorical.ipynb b/helper_tools/benefits_analysis/MetricsByTAZ_categorical.ipynb
diff --git a/helper_tools/benefits_analysis/MetricsByTAZ_continuous.ipynb b/helper_tools/benefits_analysis/MetricsByTAZ_continuous.ipynb
diff --git a/helper_tools/benefits_analysis/TAZ_attribute_overlay.py b/helper_tools/benefits_analysis/TAZ_attribute_overlay.py
@@ -0,0 +1,196 @@
+import os
+import pandas as pd
+import geopandas as gpd
+import datetime
+import sys
+import urllib.request
+import zipfile
+
+# Import code from benefits_analysis_config_reader.py for read_benefits_analysis_config_file method
+import benefits_analysis_config_reader
+
+# Import modules from core code (two levels up) by setting path
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'metamodel_py'))
+
+import rdr_setup
+import rdr_supporting
+
+def attribute_overlay(cfg, logger):
+
+    # Values from config file
+    output_dir = cfg['benefits_analysis_dir']
+    run_id = cfg['run_id']
+    TAZ_col_name = cfg['TAZ_col_name']
+
+    TAZ_source = cfg['TAZ_source']
+    attribute_source = cfg['attribute_source']
+    attribute_feature = cfg['attribute_feature']
+    output_name = cfg['output_name']
+    min_percentile_include = cfg['min_percentile_include']
+
+    attribute_crs = cfg['attribute_crs']
+
+    # Download and read in the census tract data
+    if attribute_source.strip(' ').lower() == 'censuspoverty':
+        tracts_path = os.path.join(output_dir, 'census_tracts', 'cb_2023_us_tract_500k.shp')
+        if not os.path.exists(tracts_path):
+            tract_zip = os.path.join(os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir)),
+                                     'config', 'taz_attribute_overlay', 'cb_2023_us_tract_500k.zip')
+
+            tract_dir = os.path.join(output_dir, 'census_tracts')
+            with zipfile.ZipFile(tract_zip, 'r') as zip_file:
+                zip_file.extractall(tract_dir)
+
+    # Or set the attribute source to the user-provided file
+    else:
+        tracts_path = attribute_source
+
+    # Search for shapefile
+    attribute_gdf = gpd.read_file(tracts_path)
+    attribute_gdf = attribute_gdf.set_crs(attribute_crs)
+    attribute_gdf = attribute_gdf.to_crs('EPSG:4269')
+
+    # If applicable, get default poverty data to join with census tract geometry
+
+    # https://www.census.gov/data/experimental-data-products/model-based-estimates-of-2021-persons-in-poverty.html
+    # https://www2.census.gov/programs-surveys/demo/datasets/model-based-estimates/2021/Tract.csv
+    # https://mtgis-portal.geo.census.gov/arcgis/apps/experiencebuilder/experience/?id=ad8ad0751e474f938fc98345462cdfbf&page=EDA-Census-Poverty-Status-Viewer&views=Modeled-Tract-Area-Poverty
+
+    if attribute_source.strip(' ').lower() == 'censuspoverty':
+        poverty_filepath = os.path.join(os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir)),
+                                        'config', 'taz_attribute_overlay', 'poverty.csv')
+        poverty = pd.read_csv(poverty_filepath,
+                              usecols=['state', 'county', 'tract', 'povrt'],
+                              converters = {'state':str, 'county':str, 'tract':str})
+        poverty['GEOID'] = poverty['state'] + poverty['county'] + poverty['tract']
+        poverty = poverty[["GEOID", "povrt"]]
+        poverty['poverty_percentage_bin'] = pd.Series("no data", index=poverty.index).case_when(
+            [
+                (poverty["povrt"] >= 20.00, 2),
+                (poverty["povrt"] >= 10.00, 1),
+                (poverty["povrt"] < 10.00, 0)
+            ]
+        )        
+        attribute_gdf = attribute_gdf[['GEOID', 'geometry']]
+        attribute_gdf = attribute_gdf.merge(poverty, on = 'GEOID')
+
+    # Test for existence of TAZ_source
+    if not os.path.exists(TAZ_source + '.shp'):
+        logger.error('The TAZ source file {}.shp could not be found'.format(TAZ_source))
+        raise Exception("TAZ FILE ERROR: {}.shp could not be found".format(TAZ_source))
+
+    # Read in the TAZ shapefile
+    TAZ_gdf = gpd.read_file(TAZ_source + '.shp')
+    TAZ_gdf = TAZ_gdf.to_crs('EPSG:4269')
+
+    # Rename columns from the user-specified TAZ column name
+    TAZ_gdf = TAZ_gdf.rename(columns={TAZ_col_name: 'TAZ'})
+
+    # Generate the TAZ-attribute intersection shapes
+    if not os.path.exists(os.path.join(output_dir, "TAZ_attribute_intersect.gpkg")):
+        logger.info('Intersecting {} with {}'.format(TAZ_source, tracts_path))
+        TAZ_attribute_intersect = TAZ_gdf.overlay(attribute_gdf, how = 'intersection')
+        TAZ_attribute_intersect.to_file(os.path.join(output_dir, "TAZ_attribute_intersect.gpkg"))
+    else:
+        TAZ_attribute_intersect = gpd.read_file(os.path.join(output_dir, "TAZ_attribute_intersect.gpkg"))
+
+    # If a TAZ intersects with multiple attribute areas, there may be multiple values for attribute
+    # Merge these together using maximum or mean of attribute, excluding very small areas
+    TAZ_attribute_intersect = TAZ_attribute_intersect.replace(-99999, 0)
+
+    # Minimum area of fragment to exclude from calculation
+    min_area = TAZ_attribute_intersect.area.quantile(q = min_percentile_include)
+
+    dups = TAZ_attribute_intersect.groupby('TAZ').TAZ.count() > 1
+
+    df_taz_dup = TAZ_attribute_intersect.merge(dups, how='left', left_on='TAZ', right_index=True, suffixes=('', '_y'))
+
+    # Filter out TAZ fragments which are below minimum size threshold but only if there are multiple fragments per TAZ
+    df_taz_filter = df_taz_dup.loc[(df_taz_dup.area >= min_area) & (df_taz_dup['TAZ_y'] == True) | (df_taz_dup['TAZ_y'] == False)].copy()
+
+    # Check whether attribute is continuous
+    is_continuous = df_taz_filter[attribute_feature].nunique() >= 20
+
+    logger.info('Attribute feature {} is {} type. Continuous variable detected: {}'.format(attribute_feature, df_taz_filter[attribute_feature].dtype, is_continuous))
+
+    if is_continuous:
+        # If the attribute is continuous, take the mean value
+        df_out = df_taz_filter.groupby('TAZ')[attribute_feature].mean()
+    else:
+        # If the attribute is categorical, take the max value
+        df_out = df_taz_filter.groupby('TAZ')[attribute_feature].max()
+
+    # Join back to TAZ data
+    df_taz = TAZ_gdf
+
+    df_taz_attribute = df_taz.merge(df_out, how='left', left_on='TAZ', right_index=True)
+
+    # Overwrite blank values that may have arisen from the merge just above, which can happen when the TAZ fragment filtering was too aggressive
+    if any(pd.isna(df_taz_attribute[attribute_feature])):
+        logger.info("Blank values are being overwritten in {}. Blank values can arise from a min_percentile_include parameter that is too high.".format(attribute_feature))
+        df_blanks = df_taz_attribute.loc[pd.isna(df_taz_attribute[attribute_feature]),].copy()
+        if is_continuous:
+            df_out = pd.pivot_table(data = TAZ_attribute_intersect, index = 'TAZ', aggfunc = {attribute_feature : "mean"}).reset_index()
+
+        else:
+            df_out = pd.pivot_table(data = TAZ_attribute_intersect, index = 'TAZ', aggfunc = {attribute_feature : "max"}).reset_index()
+
+        df_blanks[attribute_feature] = df_blanks.reset_index().merge(df_out, how='left', on='TAZ', suffixes=('_x', ''), ).set_index('index')[attribute_feature]
+        df_taz_attribute.loc[pd.isna(df_taz_attribute[attribute_feature]), attribute_feature] = df_blanks[attribute_feature]
+
+    # Rename the 'TAZ' column back to whatever is specified by the user
+    df_taz_attribute = df_taz_attribute.rename(columns={'TAZ' : TAZ_col_name})
+
+    # Write out gpkg and csv files
+    logger.info('Writing TAZ attribute overlay geometric file as {} to directory {}'.format(output_name + '.gpkg', output_dir))
+    df_taz_attribute.to_file(os.path.join(output_dir, output_name + '.gpkg'))
+
+    df_taz_attribute = df_taz_attribute.drop(columns = ['geometry'])
+
+    logger.info('Writing TAZ attribute overlay CSV file as {} to directory {}'.format(output_name + '.csv', output_dir))
+    df_taz_attribute.to_csv(os.path.join(output_dir, output_name + '.csv'), index=False)
+
+
+# ==============================================================================
+
+
+def main():
+
+    start_time = datetime.datetime.now()
+
+    program_name = os.path.basename(__file__)
+
+    if len(sys.argv) != 2:
+        print("usage: " + program_name + " <full_path_to_config_file>")
+        sys.exit()
+
+    full_path_to_config_file = sys.argv[1]
+
+    if not os.path.exists(full_path_to_config_file):
+        print("ERROR: config file {} can't be found!".format(full_path_to_config_file))
+        sys.exit()
+
+    cfg = benefits_analysis_config_reader.read_benefits_analysis_config_file(full_path_to_config_file)
+
+    output_dir = cfg['benefits_analysis_dir']
+
+    # set up logging and report run start time
+    # ----------------------------------------------------------------------------------------------
+    logger = rdr_supporting.create_loggers(output_dir, 'TAZ_attribute_overlay', cfg)
+
+    logger.info("=======================================================")
+    logger.info("=========== TAZ ATTRIBUTE OVERLAY STARTING ============")
+    logger.info("=======================================================")
+
+    attribute_overlay(cfg, logger)
+
+    end_time = datetime.datetime.now()
+    total_run_time = end_time - start_time
+    logger.info("Total run time: {}".format(total_run_time))
+
+
+# ==============================================================================
+
+
+if __name__ == "__main__":
+    main()
diff --git a/helper_tools/benefits_analysis/TAZ_metrics.config b/helper_tools/benefits_analysis/TAZ_metrics.config
@@ -1,7 +1,7 @@
 
-# Configuration file for running equity_overlay.py and TAZ_metrics.py
+# Configuration file for running TAZ_attribute_overlay.py and TAZ_metrics.py
 
-# Configuration Summary: To run an equity overlay for an RDR scenario, update the [common] and [equity_overlay] sections to match your scenario specifications
+# Configuration Summary: To run an attribute overlay for an RDR scenario, update the [common] and [attribute_overlay] sections to match your scenario specifications
 # Similarly, to run the TAZ metrics analysis for an RDR scenario, update the [common] and [benefits_analysis] sections to match your scenario specifications
 
 # ==============================================================================
@@ -20,34 +20,39 @@ TAZ_col_name = 'TAZ_ID'
 
 # ==============================================================================
 
-[equity_overlay]
+[attribute_overlay]
 
 # Full file path to the TAZ shapefile (do not include the SHP file extension)
 # This is a file stored on the user's local machine
 # Future versions may support feature service layer
 TAZ_source = "C:\GitHub\RDR\Data\benefits_analysis\inputs\TAZ\TAZ_Layer"
 
-# Source of the equity layer and the feature of that layer which should be used to categorize areas of equity emphasis
-# If using a file stored on the user's local machine, provide the full path to the shapefile as the equity_source parameter
-# If using the default layer, enter 'CEJST' to use the categorization developed by the Council on Environmental Quality (CEQ)
-# Climate and Economic Justice Screening Tool (CEJST)
-# Note: Default setting provides a binary composite indicator of equity emphasis areas ('SN_C'), where 1 = disadvantaged and 0 = not disadvantaged
-equity_source = 'CEJST'
-equity_feature = 'SN_C'
-
-# Equity source coordinate reference system
-# Defines the coordinate reference system (crs) of the equity source shapefile if provided
+# Source of the attribute layer and the feature of that layer which should be used to assign attribute values to TAZ
+# If using a file stored on the user's local machine, provide the full path to the shapefile as the attribute_source parameter
+# If using the default layer, enter 'censuspoverty' to use bins based on an estimate of the percentage of the population 
+# in poverty in each census tract.
+# Within the default data, 0 represents a location with less than 10 percent of the population in poverty,
+# 1 represents a location with 10 - 19.99 percent of the population in poverty, and 
+# 2 represents a location with 20 or more percent of the population in poverty. 
+# More information is available at these links:
+# https://www.census.gov/data/experimental-data-products/model-based-estimates-of-2021-persons-in-poverty.html
+# https://mtgis-portal.geo.census.gov/arcgis/apps/experiencebuilder/experience/?id=ad8ad0751e474f938fc98345462cdfbf&page=EDA-Census-Poverty-Status-Viewer&views=Modeled-Tract-Area-Poverty
+attribute_source = 'censuspoverty'
+attribute_feature = 'poverty_percentage_bin'
+
+# Attribute source coordinate reference system
+# Defines the coordinate reference system (crs) of the attribute source shapefile if provided
 # Typically in the format of 'EPSG:XXXX' where XXXX is four digits
-# Default layer uses the WGS84 Geographic Coordinate System, which is 'EPSG:4326'
-equity_crs = 'EPSG:4326'
+# Default layer uses the NAD83 Geographic Coordinate System, which is 'EPSG:4269'
+attribute_crs = 'EPSG:4269'
 
 # Minimum area to include during overlay
-# Indicates the percentile (as a decimal) of smallest overlay intersected polygons to drop before assigning equity emphasis categories to TAZs
-# Note: Setting this below 0.05 may result in very small fragments of overlay being used to assign equity emphasis category to TAZ
+# Indicates the percentile (as a decimal) of smallest overlay intersected polygons to drop before assigning attribute values to TAZs
+# Note: Setting this below 0.05 may result in very small fragments of overlay being used to assign attribute values to TAZ
 min_percentile_include = 0.05
 
 # Name of CSV file created as output (do not include CSV file extension)
-output_name = 'Equity_TAZ_Mapping'
+output_name = 'TAZ_Mapping'
 
 # ==============================================================================
 
@@ -59,8 +64,8 @@ path_to_RDR_config_file = "C:\GitHub\RDR\Data\sample_run\SampleRun.config"
 # Full file path of CSV file containing the TAZ metric data by TAZ
 # This file must contain a TAZ ID column named according to the TAZ_col_name parameter above and a TAZ category column
 # specified by the TAZ_feature parameter
-TAZ_mapping = "C:\GitHub\RDR\Data\benefits_analysis\inputs\Equity_TAZ_Mapping.csv"
-TAZ_feature = 'SN_C'
+TAZ_mapping = "C:\GitHub\RDR\Data\benefits_analysis\inputs\TAZ_Mapping.csv"
+TAZ_feature = 'poverty_percentage_bin'
 
 # Resilience project to analyze in the TAZ metrics helper tool
 # Enter the project name corresponding to RDR input files, e.g., 'HighwayProject1'