From 455317481bed5e101d8dd3e560e31b01f93d5cd6 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 19 Sep 2024 16:42:53 +0000 Subject: [PATCH 01/11] run gtfs__funnel, hqta, open data for sep2024, remove private data suppression from hqta polygons because it's inhereited from hqta points --- _shared_utils/shared_utils/rt_dates.py | 1 + gtfs_funnel/logs/download_data.log | 17 +++++++++++++++++ gtfs_funnel/logs/download_vp_v2.log | 11 +++++++++++ gtfs_funnel/logs/vp_preprocessing.log | 11 +++++++++++ gtfs_funnel/update_vars.py | 2 +- .../D2_assemble_hqta_polygons.py | 9 +++------ .../logs/hqta_processing.log | 8 ++++++++ high_quality_transit_areas/update_vars.py | 2 +- open_data/create_stops_data.py | 2 +- open_data/update_vars.py | 2 +- 10 files changed, 55 insertions(+), 10 deletions(-) diff --git a/_shared_utils/shared_utils/rt_dates.py b/_shared_utils/shared_utils/rt_dates.py index 98691d675..bcafbfb74 100644 --- a/_shared_utils/shared_utils/rt_dates.py +++ b/_shared_utils/shared_utils/rt_dates.py @@ -63,6 +63,7 @@ "jun2024": "2024-06-12", "jul2024": "2024-07-17", "aug2024": "2024-08-14", + "sep2024": "2024-09-18", } y2023_dates = [ diff --git a/gtfs_funnel/logs/download_data.log b/gtfs_funnel/logs/download_data.log index 38fd4153f..938fbc549 100644 --- a/gtfs_funnel/logs/download_data.log +++ b/gtfs_funnel/logs/download_data.log @@ -516,3 +516,20 @@ 2024-08-15 09:09:27.480 | INFO | __main__:download_one_day:33 - *********** Download st data *********** 2024-08-15 09:11:56.577 | INFO | __main__:download_one_day:56 - execution time: 0:02:30.991910 2024-08-15 10:30:38.864 | INFO | __main__:download_one_year:35 - execution time: 0:00:25.978363 +2024-09-19 08:13:46.511 | INFO | __main__:download_one_day:45 - Analysis date: 2024-09-18 +2024-09-19 08:13:49.222 | INFO | __main__:download_one_day:52 - # operators to run: 221 +2024-09-19 08:13:49.223 | INFO | __main__:download_one_day:56 - *********** Download trips data *********** +2024-09-19 08:14:16.573 | INFO | __main__:download_one_day:86 - execution time: 0:00:30.061230 +2024-09-19 08:14:35.388 | INFO | __main__:download_one_day:22 - Analysis date: 2024-09-18 +2024-09-19 08:14:37.294 | INFO | __main__:download_one_day:29 - # operators to run: 221 +2024-09-19 08:14:37.294 | INFO | __main__:download_one_day:33 - *********** Download stops data *********** +2024-09-19 08:14:47.392 | INFO | __main__:download_one_day:64 - execution time: 0:00:12.003376 +2024-09-19 08:15:03.834 | INFO | __main__:download_one_day:22 - Analysis date: 2024-09-18 +2024-09-19 08:15:05.784 | INFO | __main__:download_one_day:29 - # operators to run: 221 +2024-09-19 08:15:05.785 | INFO | __main__:download_one_day:33 - *********** Download routelines data *********** +2024-09-19 08:16:57.558 | INFO | __main__:download_one_day:63 - execution time: 0:01:53.723521 +2024-09-19 08:17:14.221 | INFO | __main__:download_one_day:21 - Analysis date: 2024-09-18 +2024-09-19 08:17:15.854 | INFO | __main__:download_one_day:29 - # operators to run: 190 +2024-09-19 08:17:15.855 | INFO | __main__:download_one_day:33 - *********** Download st data *********** +2024-09-19 08:19:06.258 | INFO | __main__:download_one_day:56 - execution time: 0:01:52.036660 +2024-09-19 09:28:35.882 | INFO | __main__:download_one_year:35 - execution time: 0:00:45.388883 diff --git a/gtfs_funnel/logs/download_vp_v2.log b/gtfs_funnel/logs/download_vp_v2.log index 987846da9..e81bf94b5 100644 --- a/gtfs_funnel/logs/download_vp_v2.log +++ b/gtfs_funnel/logs/download_vp_v2.log @@ -339,3 +339,14 @@ 2024-08-15 09:29:03.589 | INFO | __main__::112 - export concatenated vp: 0:04:16.418987 2024-08-15 09:34:04.743 | INFO | __main__::134 - remove batched parquets 2024-08-15 09:34:04.745 | INFO | __main__::137 - execution time: 0:09:26.469734 +2024-09-19 08:19:35.573 | INFO | __main__::148 - Analysis date: 2024-09-18 +2024-09-19 08:21:52.859 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 0 to GCS: 0:02:17.254015 +2024-09-19 08:23:01.583 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 1 to GCS: 0:01:08.722700 +2024-09-19 08:26:57.364 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 2 to GCS: 0:03:55.780573 +2024-09-19 08:28:55.328 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 3 to GCS: 0:01:57.952237 +2024-09-19 08:28:55.328 | INFO | __main__::155 - execution time: 0:09:19.722825 +2024-09-19 08:29:19.967 | INFO | __main__::97 - Analysis date: 2024-09-18 +2024-09-19 08:29:38.182 | INFO | __main__::105 - concat and filter batched data: 0:00:18.208902 +2024-09-19 08:33:43.251 | INFO | __main__::112 - export concatenated vp: 0:04:05.069147 +2024-09-19 08:37:30.865 | INFO | __main__::134 - remove batched parquets +2024-09-19 08:37:30.865 | INFO | __main__::137 - execution time: 0:08:10.892310 diff --git a/gtfs_funnel/logs/vp_preprocessing.log b/gtfs_funnel/logs/vp_preprocessing.log index ccb836743..7b9dddf71 100644 --- a/gtfs_funnel/logs/vp_preprocessing.log +++ b/gtfs_funnel/logs/vp_preprocessing.log @@ -200,3 +200,14 @@ 2024-08-15 10:05:01.848 | INFO | __main__::235 - vp with dwell time 2024-08-14: 0:07:09.680694 2024-08-15 10:13:16.657 | INFO | __main__::120 - 2024-08-14: condense vp for trip 0:07:51.642337 2024-08-15 10:24:50.802 | INFO | __main__::128 - 2024-08-14: prepare vp to use in nearest neighbor: 0:11:34.144491 +2024-09-19 08:46:17.298 | INFO | __main__::169 - 2024-09-18: pare down vp: 0:02:12.746302 +2024-09-19 08:51:10.542 | INFO | __main__:attach_prior_vp_add_direction:90 - persist vp gddf: 0:04:35.313281 +2024-09-19 08:55:04.346 | INFO | __main__:attach_prior_vp_add_direction:122 - np vectorize arrays for direction: 0:03:53.804190 +2024-09-19 08:55:11.908 | INFO | __main__::194 - 2024-09-18: export vp direction: 0:08:36.678934 +2024-09-19 08:56:33.980 | INFO | __main__::200 - 2024-09-18: export usable vp with direction: 0:01:22.071985 +2024-09-19 08:56:33.981 | INFO | __main__::203 - 2024-09-18: vp_direction script execution time: 0:09:58.750919 +2024-09-19 09:01:58.870 | INFO | __main__::212 - compute dwell df: 0:04:44.983561 +2024-09-19 09:03:13.198 | INFO | __main__::234 - merge with original and export: 0:01:14.327719 +2024-09-19 09:03:13.200 | INFO | __main__::235 - vp with dwell time 2024-09-18: 0:05:59.311280 +2024-09-19 09:08:43.742 | INFO | __main__::120 - 2024-09-18: condense vp for trip 0:05:09.575132 +2024-09-19 09:20:16.936 | INFO | __main__::128 - 2024-09-18: prepare vp to use in nearest neighbor: 0:11:33.194871 diff --git a/gtfs_funnel/update_vars.py b/gtfs_funnel/update_vars.py index cf98ac2c1..e59bd1b13 100644 --- a/gtfs_funnel/update_vars.py +++ b/gtfs_funnel/update_vars.py @@ -11,7 +11,7 @@ ) -analysis_date_list = [rt_dates.DATES["aug2024"]] +analysis_date_list = [rt_dates.DATES["sep2024"]] GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data") diff --git a/high_quality_transit_areas/D2_assemble_hqta_polygons.py b/high_quality_transit_areas/D2_assemble_hqta_polygons.py index 7d68be922..1270c630a 100644 --- a/high_quality_transit_areas/D2_assemble_hqta_polygons.py +++ b/high_quality_transit_areas/D2_assemble_hqta_polygons.py @@ -14,8 +14,7 @@ import C1_prep_pairwise_intersections as prep_clip import D1_assemble_hqta_points as assemble_hqta_points from calitp_data_analysis import utils, geography_utils -from D1_assemble_hqta_points import (EXPORT_PATH, add_route_info) -from shared_utils import gtfs_utils_v2 +from D1_assemble_hqta_points import EXPORT_PATH, add_route_info from update_vars import GCS_FILE_PATH, analysis_date, PROJECT_CRS catalog = intake.open_catalog("*.yml") @@ -108,9 +107,7 @@ def final_processing(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ Drop extra columns, get sorting done. Used to drop bad stops, but these all look ok. - """ - public_feeds = gtfs_utils_v2.filter_to_public_schedule_gtfs_dataset_keys() - + """ keep_cols = [ "agency_primary", "agency_secondary", "hqta_type", "hqta_details", "route_id", @@ -121,7 +118,7 @@ def final_processing(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: # Drop bad stops, subset columns gdf2 = ( - gdf[gdf.schedule_gtfs_dataset_key.isin(public_feeds)][keep_cols] + gdf[keep_cols] .drop_duplicates() .sort_values(["hqta_type", "agency_primary", "agency_secondary", diff --git a/high_quality_transit_areas/logs/hqta_processing.log b/high_quality_transit_areas/logs/hqta_processing.log index 42d1a4082..66aa62c21 100644 --- a/high_quality_transit_areas/logs/hqta_processing.log +++ b/high_quality_transit_areas/logs/hqta_processing.log @@ -134,3 +134,11 @@ 2024-08-15 10:13:09.237 | INFO | __main__::163 - C3_create_bus_hqta_types 2024-08-14 execution time: 0:00:39.499192 2024-08-15 10:14:06.867 | INFO | __main__::297 - D1_assemble_hqta_points 2024-08-14 execution time: 0:00:34.144908 2024-08-15 10:15:08.381 | INFO | __main__::167 - D2_assemble_hqta_polygons 2024-08-14 execution time: 0:00:37.945649 +2024-09-19 09:22:34.969 | INFO | __main__::354 - A1_rail_ferry_brt_stops 2024-09-18 execution time: 0:01:17.399227 +2024-09-19 09:28:56.027 | INFO | __main__::249 - B1_create_hqta_segments execution time: 0:05:58.743890 +2024-09-19 09:30:06.095 | INFO | __main__::256 - B2_sjoin_stops_to_segments 2024-09-18 execution time: 0:00:49.770142 +2024-09-19 09:30:32.509 | INFO | __main__::142 - C1_prep_pairwise_intersections 2024-09-18 execution time: 0:00:08.451303 +2024-09-19 09:31:37.803 | INFO | __main__::125 - C2_find_intersections 2024-09-18 execution time: 0:00:37.681126 +2024-09-19 09:32:36.082 | INFO | __main__::163 - C3_create_bus_hqta_types 2024-09-18 execution time: 0:00:37.486499 +2024-09-19 09:33:22.863 | INFO | __main__::307 - D1_assemble_hqta_points 2024-09-18 execution time: 0:00:27.668799 +2024-09-19 09:36:35.489 | INFO | __main__::167 - D2_assemble_hqta_polygons 2024-09-18 execution time: 0:00:26.678607 diff --git a/high_quality_transit_areas/update_vars.py b/high_quality_transit_areas/update_vars.py index 46046c8be..5de2605d6 100644 --- a/high_quality_transit_areas/update_vars.py +++ b/high_quality_transit_areas/update_vars.py @@ -1,6 +1,6 @@ from shared_utils import rt_dates -analysis_date = rt_dates.DATES["aug2024"] +analysis_date = rt_dates.DATES["sep2024"] GCS_FILE_PATH = ("gs://calitp-analytics-data/data-analyses/" "high_quality_transit_areas/") diff --git a/open_data/create_stops_data.py b/open_data/create_stops_data.py index 0a4fccea1..aa43e3442 100644 --- a/open_data/create_stops_data.py +++ b/open_data/create_stops_data.py @@ -105,7 +105,7 @@ def create_stops_file_for_export(date: str) -> gpd.GeoDataFrame: stop_times = helpers.import_scheduled_stop_times( date, columns = prep_traffic_ops.keep_stop_time_cols, - get_panda = True + get_pandas = True ) stops_assembled = attach_route_info_to_stops(stops, trips, stop_times) diff --git a/open_data/update_vars.py b/open_data/update_vars.py index b68e3d2bb..b8364cd71 100644 --- a/open_data/update_vars.py +++ b/open_data/update_vars.py @@ -1,7 +1,7 @@ from pathlib import Path from shared_utils import rt_dates -analysis_date = rt_dates.DATES["aug2024"] +analysis_date = rt_dates.DATES["sep2024"] GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/" COMPILED_CACHED_VIEWS = f"{GCS_FILE_PATH}rt_delay/compiled_cached_views/" From 9726684a062a2a4d4dc2efa348d8a9fd822a604e Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 19 Sep 2024 17:33:23 +0000 Subject: [PATCH 02/11] initial publish of operators by most recent date --- gtfs_funnel/Makefile | 3 +- gtfs_funnel/published_operators.yml | 235 ++++++++++++++++++++++++++++ gtfs_funnel/track_publish_dates.py | 85 ++++++++++ gtfs_funnel/update_vars.py | 4 +- 4 files changed, 325 insertions(+), 2 deletions(-) create mode 100644 gtfs_funnel/published_operators.yml create mode 100644 gtfs_funnel/track_publish_dates.py diff --git a/gtfs_funnel/Makefile b/gtfs_funnel/Makefile index 1389e9011..6fb981b36 100644 --- a/gtfs_funnel/Makefile +++ b/gtfs_funnel/Makefile @@ -30,7 +30,8 @@ route_typologies_data: # Clean route names for displaying across time timeseries_preprocessing: python clean_route_naming.py - + python track_publish_dates.py + # monthly scheduled service, download after the end of each month monthly_scheduled_data: python download_monthly_service.py diff --git a/gtfs_funnel/published_operators.yml b/gtfs_funnel/published_operators.yml new file mode 100644 index 000000000..a29fa245e --- /dev/null +++ b/gtfs_funnel/published_operators.yml @@ -0,0 +1,235 @@ +2024-09-18: + - Alhambra Schedule + - Amador Schedule + - Anaheim Resort Schedule + - Anaheim Resort Schedule v2 + - Antelope Valley Transit Authority Schedule + - Arcadia Schedule + - Arvin Schedule + - Auburn Schedule + - B-Line Schedule + - Baldwin Park Schedule + - Banning Pass Schedule + - Bay Area 511 AC Transit Schedule + - Bay Area 511 ACE Schedule + - Bay Area 511 Angel Island-Tiburon Ferry Schedule + - Bay Area 511 BART Schedule + - Bay Area 511 Caltrain Schedule + - Bay Area 511 Capitol Corridor Schedule + - Bay Area 511 Commute.org Schedule + - Bay Area 511 County Connection Schedule + - Bay Area 511 Dumbarton Express Schedule + - Bay Area 511 Emery Go-Round Schedule + - Bay Area 511 Fairfield and Suisun Transit Schedule + - Bay Area 511 Golden Gate Ferry Schedule + - Bay Area 511 Golden Gate Transit Schedule + - Bay Area 511 MVGO Schedule + - Bay Area 511 Marin Schedule + - Bay Area 511 Mission Bay Schedule + - Bay Area 511 Muni Schedule + - Bay Area 511 Petaluma Schedule + - Bay Area 511 Rio Vista Delta Breeze Schedule + - Bay Area 511 SFO AirTrain Schedule + - Bay Area 511 SamTrans Schedule + - Bay Area 511 San Francisco Bay Ferry Schedule + - Bay Area 511 Santa Clara Transit Schedule + - Bay Area 511 Santa Rosa CityBus Schedule + - Bay Area 511 SolTrans Schedule + - Bay Area 511 Sonoma County Transit Schedule + - Bay Area 511 Sonoma-Marin Area Rail Transit Schedule + - Bay Area 511 South San Francisco Shuttle Schedule + - Bay Area 511 Treasure Island Ferry Schedule + - Bay Area 511 Tri Delta Schedule + - Bay Area 511 Tri-Valley Wheels Schedule + - Bay Area 511 Union City Transit Schedule + - Bay Area 511 Vacaville City Coach Schedule + - Bay Area 511 Vine Transit Schedule + - Bay Area 511 WestCAT Schedule + - Beach Cities GMV Schedule + - Bear Schedule + - Beaumont Pass Schedule + - Bell Gardens Schedule + - Bellflower Bus Schedule + - Big Blue Bus Schedule + - Big Blue Bus Swiftly Schedule + - BruinBus Schedule + - Burbank Schedule + - Calabasas Schedule + - Calaveras Schedule + - Cerritos on Wheels Schedule + - Cerritos on Wheels Website Schedule + - Clean Air Express Schedule + - Clovis Schedule + - Commerce Schedule + - Corona Schedule + - County Express Schedule + - Cudahy Schedule + - Culver City Schedule + - Curry Public Transit Schedule + - Dana Point Trolley Schedule + - Delano Schedule + - Desert Roadrunner GMV Schedule + - Desert Roadrunner Schedule + - DowneyLINK GMV Schedule + - Eastern Sierra Schedule + - El Dorado Schedule + - El Monte Schedule + - Elk Grove Schedule + - Flixbus Schedule + - Foothill Schedule + - Fresno County Schedule + - Fresno Schedule + - G Trans Schedule + - GET Schedule + - Get Around Town Express Schedule + - Glendale Schedule + - Glendora Schedule + - Glenn Schedule + - Go West Schedule + - Grapeline Schedule + - Guadalupe Flyer Schedule + - Havasu Landing Ferry Schedule + - Humboldt Schedule + - Huntington Schedule + - Imperial Valley Transit Schedule + - Inglewood Schedule + - Irvine CONNECT Schedule + - Kern Schedule + - Kings Schedule + - LA DOT Schedule + - LA Metro Bus Schedule + - LA Metro Rail Schedule + - LADPW Schedule + - LAX FlyAway Schedule + - LAX Flyaway Bus Schedule + - LAX Shuttles Schedule + - La Campana Schedule + - La Puente Schedule + - Laguna Beach Schedule + - Lake Schedule + - Lassen Schedule + - Lawndale Beat GMV Schedule + - Lawndale Schedule + - Lompoc Schedule + - Long Beach Schedule + - Lynwood Schedule IPS + - MV Shuttle Schedule + - Madera County Connection Schedule + - Madera Metro Schedule + - Mariposa Grove Shuttle Schedule + - Maywood Schedule + - Mendocino Schedule + - Merced GMV Schedule + - Merced Schedule + - Metrolink Schedule + - Montebello Schedule + - Monterey Salinas Schedule + - Morongo Basin Schedule + - Morro Bay Cal-ITP Schedule + - Mountain Transit GMV Schedule + - Mountain Transit Schedule + - Needles Schedule + - Nevada County Schedule + - North County Schedule + - Norwalk Avail Schedule + - OCTA Schedule + - OmniTrans Schedule + - Oregon POINT + - Palos Verdes PTA Schedule + - Pasadena Schedule + - Placer Schedule + - Plumas Schedule + - PresidiGo Schedule + - Redding Schedule + - Redwood Coast Schedule + - Riverside Schedule + - Rosemead Passio Schedule + - Roseville Schedule + - Roseville Transit GMV Schedule + - SBMTD Schedule + - SLO Schedule + - SLORTA Schedule + - Sage Stage Schedule + - San Clemente Trolley Schedule + - San Diego Schedule + - San Fernando Schedule + - San Joaquin Schedule + - San Juan Capistrano Trolley Schedule + - Santa Clarita Schedule + - Santa Maria Schedule + - Santa Ynez Mecatran Schedule + - Sierra Madre Schedule + - Siskiyou Schedule + - South County Transit Link Schedule + - South San Francisco Schedule + - Spirit Bus Passio Schedule + - StanRTA Schedule + - Stanford Schedule + - SunLine Avail Schedule + - 'TART, North Lake Tahoe Schedule' + - TCRTA TripShot Schedule + - Tahoe Transportation District GMV Schedule + - Tahoe Transportation District Schedule + - Tehama Schedule + - Torrance Schedule + - Tracy Schedule + - Trinity Schedule + - Tuolumne Remix Schedule + - Turlock Schedule + - UCSC Schedule + - Unitrans Schedule + - VCTC GMV Schedule + - Victor Valley GMV Schedule + - Victor Valley Schedule + - Visalia Schedule + - WeHo Schedule + - YARTS Schedule + - Yolobus Schedule + - Yosemite Valley Shuttle Schedule + - Yuba-Sutter Schedule + - Yuma Schedule + - eTrans Schedule +2024-08-14: + - Santa Cruz Schedule +2024-06-12: + - Anteater Express Schedule + - Lassen Flex + - Lynwood Schedule + - Manteca Schedule +2024-05-22: + - El Segundo Schedule + - Redwood Coast Schedulel +2024-04-17: + - Sacramento Schedule +2024-03-13: + - Avalon Schedule +2024-02-14: + - Rosemead Schedule +2023-12-13: + - DowneyLINK Schedule + - Humboldt Flex + - Laguna Beach Flex + - Manteca Flex + - Placer Flex + - San Joaquin Flex + - Spirit Bus Schedule + - StanRTA Flex + - TART Flex + - Thousand Oaks Flex + - Tracy Flex + - Turlock Flex + - Union City Flex + - VCTC Flex + - WestCAT Flex +2023-11-15: + - Amtrak Schedule + - Mission Bay Schedule +2023-08-15: + - Blossom Express Schedule + - Eastern Sierra Flex +2023-06-14: + - Tuolumne Schedule +2023-04-12: + - Guadalupe Flex +2023-03-15: + - TIME GMV Schedule diff --git a/gtfs_funnel/track_publish_dates.py b/gtfs_funnel/track_publish_dates.py new file mode 100644 index 000000000..4cf6d90e1 --- /dev/null +++ b/gtfs_funnel/track_publish_dates.py @@ -0,0 +1,85 @@ +""" +Grab all the operators by service date from +saved scheduled_trips tables from GCS. + +Create a yaml that tells us the most recent +date available for each operator (schedule_gtfs_dataset_name). +""" +import pandas as pd +import pyaml # use pyaml because it gets us prettier indents than yaml + +from pathlib import Path +from typing import Union + +from shared_utils import rt_dates +from segment_speed_utils import time_series_utils + +def filter_to_recent_date(df: pd.DataFrame) -> pd.DataFrame: + """ + By schedule_gtfs_dataset_name, keep the most recent + service_date that shows up in scheduled trips. + """ + df2 = (df.groupby("name", group_keys=False) + .service_date + .max() + .reset_index() + .sort_values(["service_date", "name"], ascending=[False, True]) + .reset_index(drop=True) + .astype({"service_date": "str"}) + ) + return df2 + +def export_results_yml( + df: pd.DataFrame, + export_yaml: Union[str, Path] +): + """ + Save out our results from df. + Convert df into a dictionary and save out dictionary results as yaml. + """ + # TODO: check this list manually and there will be some + # operator names that have more recent names that we are keeping, + # so we can remove these from our yaml + exclude_me = [ + "TIME GMV" + ] + + df2 = df[~df.name.isin(exclude_me)] + + my_dict = { + **{ + date_key: df2[df2.service_date==date_key].name.tolist() + for date_key in df2.service_date.unique() + } + } + + # sort_keys=False to prevent alphabetical sort (earliest date first) + # because we want to main our results and yaml with most recent date first + output = pyaml.dump(my_dict, sort_keys=False) + + with open(export_yaml, "w") as f: + f.write(output) + + print(f"{export_yaml} exported") + + return + + +if __name__ == "__main__": + + from update_vars import (GTFS_DATA_DICT, + COMPILED_CACHED_VIEWS, + PUBLISHED_OPERATORS_YAML) + + TABLE = GTFS_DATA_DICT.schedule_downloads.trips + + operators = time_series_utils.concatenate_datasets_across_dates( + COMPILED_CACHED_VIEWS, + TABLE, + rt_dates.y2024_dates + rt_dates.y2023_dates, + data_type = "df", + get_pandas = True, + columns = ["name"] + ).drop_duplicates().pipe(filter_to_recent_date) + + export_results_yml(operators, PUBLISHED_OPERATORS_YAML) \ No newline at end of file diff --git a/gtfs_funnel/update_vars.py b/gtfs_funnel/update_vars.py index e59bd1b13..02828ae30 100644 --- a/gtfs_funnel/update_vars.py +++ b/gtfs_funnel/update_vars.py @@ -21,4 +21,6 @@ SCHED_GCS = GTFS_DATA_DICT.gcs_paths.SCHED_GCS SHARED_GCS = GTFS_DATA_DICT.gcs_paths.SHARED_GCS -ntd_latest_year = 2022 \ No newline at end of file +ntd_latest_year = 2022 + +PUBLISHED_OPERATORS_YAML = "published_operators.yml" \ No newline at end of file From 9d4c93ff30b85ee0acc30a1cd9ff6c8984d1c40e Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 19 Sep 2024 19:34:04 +0000 Subject: [PATCH 03/11] add export path to update_vars for hqta --- .../check2_hq_corridors.ipynb | 6 ++--- .../check3_hqta_points.ipynb | 26 +++++++++++++++++-- high_quality_transit_areas/update_vars.py | 3 ++- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/high_quality_transit_areas/check2_hq_corridors.ipynb b/high_quality_transit_areas/check2_hq_corridors.ipynb index c4440b9b0..eba631e67 100644 --- a/high_quality_transit_areas/check2_hq_corridors.ipynb +++ b/high_quality_transit_areas/check2_hq_corridors.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "ecdd335a-be94-4a11-aaca-24a43a3b9756", "metadata": {}, "outputs": [], @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "4fa137db-08d5-4822-9bdd-46919ee0da7f", "metadata": {}, "outputs": [], @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "1e383a39-3810-41f6-a366-985126b335db", "metadata": {}, "outputs": [], diff --git a/high_quality_transit_areas/check3_hqta_points.ipynb b/high_quality_transit_areas/check3_hqta_points.ipynb index 42e7d19f6..7869c49f4 100644 --- a/high_quality_transit_areas/check3_hqta_points.ipynb +++ b/high_quality_transit_areas/check3_hqta_points.ipynb @@ -52,7 +52,7 @@ " if \"service_date\" in gdf.columns:\n", " gdf = gdf.drop(columns = \"service_date\")\n", " \n", - " m = gdf.explore(plot_col, categorical=True, tiles = TILES)\n", + " m = gdf.explore(plot_col, categorical=True, tiles = TILES, legend=True)\n", " \n", " display(m)" ] @@ -171,7 +171,29 @@ "id": "41dcbec2-16a9-4d56-9f30-9b2e83bd2741", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "gdf[gdf.agency_primary.str.contains(\"Santa Monica\")].base64_url_primary.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ace4244c-b351-4259-b8bc-75e1d0105a58", + "metadata": {}, + "outputs": [], + "source": [ + "gdf[gdf.agency_primary.str.contains(\"Santa Monica\")]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2029272f-7d46-4d11-9918-357c43253128", + "metadata": {}, + "outputs": [], + "source": [ + "agency_primary" + ] } ], "metadata": { diff --git a/high_quality_transit_areas/update_vars.py b/high_quality_transit_areas/update_vars.py index 5de2605d6..dffb41557 100644 --- a/high_quality_transit_areas/update_vars.py +++ b/high_quality_transit_areas/update_vars.py @@ -7,4 +7,5 @@ TEMP_GCS = f"{GCS_FILE_PATH}temp/" PROJECT_CRS = "EPSG:3310" HQTA_SEGMENT_LENGTH = 1_250 # meters -BUFFER_METERS = 50 \ No newline at end of file +BUFFER_METERS = 50 +EXPORT_PATH = f"{GCS_FILE_PATH}export/{analysis_date}/" \ No newline at end of file From 606000e2bb15a67a2c9966d3796b3e95a406a3ea Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 19 Sep 2024 19:48:35 +0000 Subject: [PATCH 04/11] run segment speeds for sep2024 --- rt_segment_speeds/logs/avg_speeds.log | 9 +++++++++ rt_segment_speeds/logs/cut_stop_segments.log | 2 ++ rt_segment_speeds/logs/interpolate_stop_arrival.log | 2 ++ rt_segment_speeds/logs/nearest_vp.log | 6 ++++++ rt_segment_speeds/logs/speeds_by_segment_trip.log | 3 +++ rt_segment_speeds/segment_speed_utils/project_vars.py | 2 +- 6 files changed, 23 insertions(+), 1 deletion(-) diff --git a/rt_segment_speeds/logs/avg_speeds.log b/rt_segment_speeds/logs/avg_speeds.log index dd0382eaf..816292e0c 100644 --- a/rt_segment_speeds/logs/avg_speeds.log +++ b/rt_segment_speeds/logs/avg_speeds.log @@ -446,3 +446,12 @@ 2024-08-15 12:49:58.589 | INFO | average_segment_speeds:single_day_segment_averages:173 - shape seg avg 0:05:18.197146 2024-08-15 12:53:37.977 | INFO | average_segment_speeds:single_day_segment_averages:189 - route dir seg avg 0:03:39.388630 2024-08-15 12:53:37.978 | INFO | average_segment_speeds:single_day_segment_averages:190 - single day segment 2024-08-14 execution time: 0:08:57.585776 +2024-09-19 11:41:23.528 | INFO | __main__:single_day_segment_averages:173 - shape seg avg 0:04:35.831329 +2024-09-19 11:44:21.408 | INFO | __main__:single_day_segment_averages:189 - route dir seg avg 0:02:57.880787 +2024-09-19 11:44:21.409 | INFO | __main__:single_day_segment_averages:190 - single day segment 2024-09-18 execution time: 0:07:33.712116 +2024-09-19 12:25:35.942 | INFO | __main__:single_day_summary_averages:90 - trip avg 0:00:15.246328 +2024-09-19 12:25:48.332 | INFO | __main__:single_day_summary_averages:132 - route dir avg: 0:00:12.390061 +2024-09-19 12:25:48.333 | INFO | __main__:single_day_summary_averages:133 - single day summary speed 2024-09-18 execution time: 0:00:27.636389 +2024-09-19 12:42:38.061 | INFO | average_segment_speeds:single_day_segment_averages:173 - shape seg avg 0:05:30.126380 +2024-09-19 12:46:50.506 | INFO | average_segment_speeds:single_day_segment_averages:189 - route dir seg avg 0:04:12.445389 +2024-09-19 12:46:50.507 | INFO | average_segment_speeds:single_day_segment_averages:190 - single day segment 2024-09-18 execution time: 0:09:42.571769 diff --git a/rt_segment_speeds/logs/cut_stop_segments.log b/rt_segment_speeds/logs/cut_stop_segments.log index 655030839..39bf1068a 100644 --- a/rt_segment_speeds/logs/cut_stop_segments.log +++ b/rt_segment_speeds/logs/cut_stop_segments.log @@ -45,3 +45,5 @@ 2024-07-30 21:16:51.780 | INFO | __main__::244 - speedmap segments and proxy_stop_times 2023-04-16: 0:02:19.401564 2024-08-15 10:55:47.957 | INFO | __main__::155 - cut segments 2024-08-14: 0:21:24.282441 2024-08-15 11:01:37.861 | INFO | __main__::244 - speedmap segments and proxy_stop_times 2024-08-14: 0:04:20.718384 +2024-09-19 10:45:10.417 | INFO | __main__::155 - cut segments 2024-09-18: 0:22:12.922031 +2024-09-19 10:51:18.211 | INFO | __main__::244 - speedmap segments and proxy_stop_times 2024-09-18: 0:04:36.568172 diff --git a/rt_segment_speeds/logs/interpolate_stop_arrival.log b/rt_segment_speeds/logs/interpolate_stop_arrival.log index 1b3b751e4..29cc7fc4a 100644 --- a/rt_segment_speeds/logs/interpolate_stop_arrival.log +++ b/rt_segment_speeds/logs/interpolate_stop_arrival.log @@ -99,3 +99,5 @@ 2024-08-15 11:46:45.773 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for stop_segments 2024-08-14: 2024-08-14: 0:13:26.403842 2024-08-15 12:31:36.711 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for rt_stop_times 2024-08-14: 2024-08-14: 0:14:13.913489 2024-08-15 12:42:32.459 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for speedmap_segments 2024-08-14: 2024-08-14: 0:02:27.666741 +2024-09-19 11:34:46.012 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for stop_segments 2024-09-18: 2024-09-18: 0:15:34.067479 +2024-09-19 12:22:50.153 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for rt_stop_times 2024-09-18: 2024-09-18: 0:15:01.401473 diff --git a/rt_segment_speeds/logs/nearest_vp.log b/rt_segment_speeds/logs/nearest_vp.log index ff4f162c2..bcb9357da 100644 --- a/rt_segment_speeds/logs/nearest_vp.log +++ b/rt_segment_speeds/logs/nearest_vp.log @@ -201,3 +201,9 @@ 2024-08-15 12:17:22.737 | INFO | vp_around_stops:filter_to_nearest_two_vp:247 - nearest 2 vp for rt_stop_times 2024-08-14: 0:09:34.224293 2024-08-15 12:36:39.952 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for speedmap_segments 2024-08-14: 0:02:25.873838 2024-08-15 12:40:04.733 | INFO | vp_around_stops:filter_to_nearest_two_vp:247 - nearest 2 vp for speedmap_segments 2024-08-14: 0:03:24.363193 +2024-09-19 11:06:22.823 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for stop_segments 2024-09-18: 0:13:00.998435 +2024-09-19 11:19:11.854 | INFO | vp_around_stops:filter_to_nearest_two_vp:247 - nearest 2 vp for stop_segments 2024-09-18: 0:09:50.183557 +2024-09-19 11:57:34.800 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for rt_stop_times 2024-09-18: 0:12:19.977712 +2024-09-19 12:07:48.692 | INFO | vp_around_stops:filter_to_nearest_two_vp:247 - nearest 2 vp for rt_stop_times 2024-09-18: 0:10:11.973530 +2024-09-19 12:28:39.454 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for speedmap_segments 2024-09-18: 0:02:33.742427 +2024-09-19 12:32:09.310 | INFO | vp_around_stops:filter_to_nearest_two_vp:247 - nearest 2 vp for speedmap_segments 2024-09-18: 0:03:29.417591 diff --git a/rt_segment_speeds/logs/speeds_by_segment_trip.log b/rt_segment_speeds/logs/speeds_by_segment_trip.log index a7e368136..18527e3c9 100644 --- a/rt_segment_speeds/logs/speeds_by_segment_trip.log +++ b/rt_segment_speeds/logs/speeds_by_segment_trip.log @@ -110,3 +110,6 @@ 2024-08-15 11:48:10.483 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:176 - speeds by segment for stop_segments 2024-08-14: 0:01:24.614156 2024-08-15 12:33:12.660 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:176 - speeds by segment for rt_stop_times 2024-08-14: 0:01:35.909290 2024-08-15 12:44:22.889 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:176 - speeds by segment for speedmap_segments 2024-08-14: 0:01:41.398512 +2024-09-19 11:36:29.235 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:176 - speeds by segment for stop_segments 2024-09-18: 0:01:43.166792 +2024-09-19 12:25:01.693 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:176 - speeds by segment for rt_stop_times 2024-09-18: 0:02:11.499303 +2024-09-19 12:36:50.740 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:176 - speeds by segment for speedmap_segments 2024-09-18: 0:01:46.975907 diff --git a/rt_segment_speeds/segment_speed_utils/project_vars.py b/rt_segment_speeds/segment_speed_utils/project_vars.py index 7d375c817..3c9f7a259 100644 --- a/rt_segment_speeds/segment_speed_utils/project_vars.py +++ b/rt_segment_speeds/segment_speed_utils/project_vars.py @@ -11,7 +11,7 @@ SHARED_GCS = GTFS_DATA_DICT.gcs_paths.SHARED_GCS PUBLIC_GCS = GTFS_DATA_DICT.gcs_paths.PUBLIC_GCS -analysis_date = rt_dates.DATES["aug2024"] +analysis_date = rt_dates.DATES["sep2024"] oct2023_week = rt_dates.get_week("oct2023", exclude_wed=True) apr2023_week = rt_dates.get_week("apr2023", exclude_wed=True) From 4145dd4357535730b210615d79efc6df06427e25 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 19 Sep 2024 20:22:39 +0000 Subject: [PATCH 05/11] rt_vs_sched for sep2024 --- rt_scheduled_v_ran/logs/rt_v_scheduled_route_metrics.log | 1 + rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log | 3 +++ rt_scheduled_v_ran/scripts/update_vars.py | 3 +-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/rt_scheduled_v_ran/logs/rt_v_scheduled_route_metrics.log b/rt_scheduled_v_ran/logs/rt_v_scheduled_route_metrics.log index 34acf4234..637e03c7d 100644 --- a/rt_scheduled_v_ran/logs/rt_v_scheduled_route_metrics.log +++ b/rt_scheduled_v_ran/logs/rt_v_scheduled_route_metrics.log @@ -69,3 +69,4 @@ 2024-08-05 10:46:49.044 | INFO | __main__:route_metrics:84 - route aggregation 2024-07-17: 0:00:03.060244 2024-08-05 10:49:43.399 | INFO | __main__:route_metrics:84 - route aggregation 2024-07-17: 0:00:02.982204 2024-08-15 13:24:21.737 | INFO | __main__:route_metrics:84 - route aggregation 2024-08-14: 0:00:02.641057 +2024-09-19 13:19:02.357 | INFO | __main__:route_metrics:84 - route aggregation 2024-09-18: 0:00:02.698805 diff --git a/rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log b/rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log index 1032c2ed7..4cc78f5a0 100644 --- a/rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log +++ b/rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log @@ -450,3 +450,6 @@ 2024-08-15 12:57:49.299 | INFO | __main__:rt_schedule_trip_metrics:280 - tabular trip metrics 2024-08-14: 0:02:33.455269 2024-08-15 13:22:11.674 | INFO | __main__:rt_schedule_trip_metrics:285 - spatial trip metrics 2024-08-14: 0:24:22.374322 2024-08-15 13:23:57.590 | INFO | __main__:rt_schedule_trip_metrics:333 - Total run time for metrics on 2024-08-14: 0:28:41.746058 +2024-09-19 12:52:30.501 | INFO | __main__:rt_schedule_trip_metrics:280 - tabular trip metrics 2024-09-18: 0:02:49.593356 +2024-09-19 13:16:44.431 | INFO | __main__:rt_schedule_trip_metrics:285 - spatial trip metrics 2024-09-18: 0:24:13.930638 +2024-09-19 13:18:42.287 | INFO | __main__:rt_schedule_trip_metrics:333 - Total run time for metrics on 2024-09-18: 0:29:01.379486 diff --git a/rt_scheduled_v_ran/scripts/update_vars.py b/rt_scheduled_v_ran/scripts/update_vars.py index a63703256..c29e2d583 100644 --- a/rt_scheduled_v_ran/scripts/update_vars.py +++ b/rt_scheduled_v_ran/scripts/update_vars.py @@ -5,8 +5,7 @@ apr2023_week = rt_dates.get_week("apr2023", exclude_wed=True) apr2024_week = rt_dates.get_week("apr2024", exclude_wed=True) -analysis_date_list = [ - rt_dates.DATES["aug2024"]] +analysis_date_list = [rt_dates.DATES["sep2024"]] GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data") From a9f622babc7b226bca4a1b6063efecda46d53d97 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 19 Sep 2024 20:47:48 +0000 Subject: [PATCH 06/11] (refactor): hqta major transit stop compilation --- .../A1_rail_ferry_brt_stops.py | 328 +++++++----------- .../check1_downloads.ipynb | 174 +--------- 2 files changed, 129 insertions(+), 373 deletions(-) diff --git a/high_quality_transit_areas/A1_rail_ferry_brt_stops.py b/high_quality_transit_areas/A1_rail_ferry_brt_stops.py index 9705610f9..a35aa0524 100644 --- a/high_quality_transit_areas/A1_rail_ferry_brt_stops.py +++ b/high_quality_transit_areas/A1_rail_ferry_brt_stops.py @@ -1,9 +1,6 @@ """ -Download rail, ferry, BRT stops. -Export combined rail/ferry/BRT data into GCS. - -Clean up the combined rail/ferry/BRT points -and get it ready to be combined with other bus-related points. +Assemble major transit stops for rail, BRT, and ferry +and export to GCS. Turn rail_ferry_brt.ipynb and combine_and_visualize.ipynb into scripts. @@ -19,7 +16,7 @@ from calitp_data_analysis import utils from segment_speed_utils import helpers from segment_speed_utils.project_vars import COMPILED_CACHED_VIEWS -from update_vars import GCS_FILE_PATH, analysis_date, TEMP_GCS +from update_vars import GCS_FILE_PATH, analysis_date catalog = intake.open_catalog("*.yml") @@ -81,224 +78,159 @@ '13805', '141012', ] -def filter_trips_to_route_type( - analysis_date: str, - route_types: list -) -> pd.DataFrame: +def assemble_stops(analysis_date: str) -> gpd.GeoDataFrame: """ - Can use route_type_* from stops table, but since BRT needs to start - from trips, might as well just get it from trips. + Start with stop_times, attach stop geometry, + and also route info (route_type) from trips table. """ - - trips = helpers.import_scheduled_trips( + stop_times = helpers.import_scheduled_stop_times( analysis_date, - columns = ["feed_key", "name", "trip_id", - "route_id", "route_type", "route_desc"], + columns = ["feed_key", "schedule_gtfs_dataset_key", + "stop_id", "trip_instance_key"], + with_direction = True, + get_pandas = True ) - if isinstance(route_types, list): - trips_subset = trips[trips.route_type.isin(route_types)] - - elif route_types == "brt": - trips_subset = filter_to_brt_trips(trips) - - trips_subset = (trips_subset - .drop(columns = "route_desc") - .drop_duplicates() - .reset_index(drop=True) - ) - - return trips_subset - - -def filter_to_brt_trips(trips: pd.DataFrame) -> pd.DataFrame: - """ - Start with trips table and filter to specific routes that - are BRT - """ - BRT_ROUTE_FILTERING = { - "Bay Area 511 AC Transit Schedule": {"route_id": ac_transit_route_id}, - "LA Metro Bus Schedule": {"route_desc": metro_route_desc}, - "Bay Area 511 Muni Schedule": {"route_id": muni_route_id}, - # Omni BRT -- too infrequent! - #"OmniTrans Schedule": {"route_short_name": ["sbX"]} - } - - all_brt_trips = pd.DataFrame() - - for name, filtering_cond in BRT_ROUTE_FILTERING.items(): - for col, filtering_list in filtering_cond.items(): - trips_subset = trips[ - (trips.name == name) & - (trips[col].isin(filtering_list))] - - all_brt_trips = pd.concat([all_brt_trips, trips_subset], axis=0) - - return all_brt_trips - - -def filter_unique_stops_for_trips( - analysis_date: str, - trip_df: pd.DataFrame -) -> gpd.GeoDataFrame: - """ - Start with all operators' stop_times, and narrow down to the trip_ids - present for the route_type and keep the unique stops. - - Then attach the stop's point geometry. - """ - stop_times = helpers.import_scheduled_stop_times( + trips = helpers.import_scheduled_trips( analysis_date, - with_direction = False, + columns = [ + "name", + "trip_instance_key", + "route_id", "route_type", "route_desc" + ], get_pandas = True ) - - keep_stop_cols = [ - "feed_key", "name", - "stop_id", - "route_id", "route_type", - # let's keep route_id, since we double check in a notebook - ] - - stops_for_trips = pd.merge( + + stops_with_route = pd.merge( stop_times, - trip_df, - on = ["feed_key", "trip_id"], + trips, + on = "trip_instance_key", how = "inner" - )[keep_stop_cols].drop_duplicates().reset_index(drop=True) + ).drop( + columns = "trip_instance_key" + ).drop_duplicates().reset_index(drop=True) # Attach stop geometry stops = helpers.import_scheduled_stops( analysis_date, + columns = ["feed_key", "stop_id", "stop_name", "geometry"], + get_pandas = True ) stops_with_geom = pd.merge( stops, - stops_for_trips, + stops_with_route, on = ["feed_key", "stop_id"], how = "inner" - )[keep_stop_cols + ["stop_name", "geometry"]] + ) return stops_with_geom - -def grab_rail_data(analysis_date: str): + +def grab_rail_stops( + gdf: gpd.GeoDataFrame, + route_types: list = ['0', '1', '2'] +) -> gpd.GeoDataFrame: """ Grab all the rail stops. + """ + return gdf[ + gdf.route_type.isin(route_types) + ].reset_index(drop=True).assign(hqta_type = "major_stop_rail") + + +def grab_ferry_stops( + gdf: gpd.GeoDataFrame, + route_types: list = ['4'] +) -> gpd.GeoDataFrame: """ - rail_route_types = ['0', '1', '2'] - - rail_trips = filter_trips_to_route_type(analysis_date, rail_route_types) - rail_stops = filter_unique_stops_for_trips(analysis_date, rail_trips) - - utils.geoparquet_gcs_export( - rail_stops, - TEMP_GCS, - "rail_stops" - ) - - -def grab_brt_data(analysis_date: str): - """ - Grab BRT routes, stops data for certain operators in CA by analysis date. - """ - - brt_trips = filter_trips_to_route_type(analysis_date, "brt") - brt_stops = filter_unique_stops_for_trips(analysis_date, brt_trips) - - utils.geoparquet_gcs_export( - brt_stops, - TEMP_GCS, - "brt_stops" - ) + Grab all the ferry stops. + """ + # only stops without bus service + angel_and_alcatraz = ['2483552', '2483550', '43002'] + return gdf[ + (gdf.route_type.isin(route_types)) & + ~(gdf.stop_id.isin(angel_and_alcatraz)) + ].reset_index(drop=True).assign(hqta_type = "major_stop_ferry") + -def additional_brt_filtering_out_stops( - df: gpd.GeoDataFrame, +def grab_brt_stops( + gdf: gpd.GeoDataFrame, + route_types: list = ["3"] ) -> gpd.GeoDataFrame: """ - df: geopandas.GeoDataFrame - Input BRT stops data (combined across operators) + Start with the stops that has route information + and start filtering based on operator name, route_id / route_desc, + and stop_ids to include or exclude. + + The stop id lists were manually provided (by Muni) and/or verified by us. """ metro_name = "LA Metro Bus Schedule" muni_name = "Bay Area 511 Muni Schedule" + ac_transit_name = "Bay Area 511 AC Transit Schedule" + # Omni BRT -- too infrequent! "route_short_name": ["sbX"] - muni = df[df.name == muni_name].query( - 'stop_id in @muni_brt_include' - ) - - # For Metro, unable to filter out non-station stops using GTFS, manual list - metro = df[df.name == metro_name].query( - 'stop_id not in @metro_j_exclude') - - muni_metro = pd.concat([muni, metro], axis=0) + BRT_ROUTE_FILTERING = { + "Bay Area 511 AC Transit Schedule": {"route_id": ac_transit_route_id}, + "LA Metro Bus Schedule": {"route_desc": metro_route_desc}, + } - other_operators = df[~df.name.isin([metro_name, muni_name])] - - brt_df_stops = pd.concat( - [muni_metro, other_operators], axis=0 - ).sort_values(["feed_key", "name"]).reset_index(drop=True) + brt_operator_stops = gdf[ + (gdf.route_type.isin(route_types)) & + (gdf.name.isin([metro_name, muni_name, ac_transit_name])) + ] - return brt_df_stops - - -def grab_ferry_data(analysis_date: str): - """ - Grab all the ferry stops. - """ - ferry_route_types = ['4'] - - ferry_trips = filter_trips_to_route_type(analysis_date, ferry_route_types) - ferry_stops = filter_unique_stops_for_trips(analysis_date, ferry_trips) - - # only stops without bus service - angel_and_alcatraz = ['2483552', '2483550', '43002'] + muni_brt = brt_operator_stops[ + (brt_operator_stops.name == muni_name) & + (brt_operator_stops.route_id.isin(muni_route_id)) & + (brt_operator_stops.stop_id.isin(muni_brt_include)) + ] - ferry_stops = ferry_stops[ - ~ferry_stops.stop_id.isin(angel_and_alcatraz) - ].reset_index(drop=True) + # For Metro, unable to filter out non-station stops using GTFS, manual list + metro_brt = brt_operator_stops[ + (brt_operator_stops.name == metro_name) & + (brt_operator_stops.route_desc.isin(metro_route_desc)) & + ~(brt_operator_stops.stop_id.isin(metro_j_exclude)) + ] - utils.geoparquet_gcs_export( - ferry_stops, - TEMP_GCS, - "ferry_stops" - ) - + ac_transit_brt = brt_operator_stops[ + (brt_operator_stops.name == ac_transit_name) & + (brt_operator_stops.route_id.isin(ac_transit_route_id)) + ] + + brt_stops = pd.concat( + [muni_brt, metro_brt, ac_transit_brt], axis=0 + ).reset_index(drop=True).assign(hqta_type = "major_stop_brt") -def clip_to_ca(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: - """ - Clip to CA boundaries. - """ - ca = catalog.ca_boundary.read().to_crs(gdf.crs) + return brt_stops - gdf2 = gdf.clip(ca, keep_geom_type = False).reset_index(drop=True) - return gdf2 - - -def get_rail_ferry_brt_extract() -> gpd.GeoDataFrame: +def compile_rail_ferry_brt_stops( + list_of_files: list + ) -> gpd.GeoDataFrame: """ Prepare the rail / ferry / BRT stops to be assembled with the bus_hqta types and saved into the hqta_points file. """ - df = catalog.rail_brt_ferry_initial.read() - - keep_cols = ["feed_key", "name", "stop_id", - "route_type", "geometry"] + df = pd.concat( + list_of_files, + axis=0, ignore_index=True + ) - rail_types = ["0", "1", "2"] - bus_types = ["3"] - ferry_types = ["4"] + keep_cols = [ + "schedule_gtfs_dataset_key", "feed_key", + "stop_id", "stop_name", + "route_id", "route_type", + "hqta_type", "geometry" + ] - df2 = (df[keep_cols].assign( - hqta_type = df.route_type.map( - lambda x: "major_stop_rail" if x in rail_types - else "major_stop_brt" if x in bus_types - else "major_stop_ferry" if x in ferry_types - else "missing" # add flag to make it easier to check results - ) - ).rename(columns = {"feed_key": "feed_key_primary"}) - .drop(columns = ["route_type", "name"]) + df2 = (df[keep_cols] + .sort_values(["feed_key", "stop_id"]).reset_index(drop=True) + .rename(columns = { + "feed_key": "feed_key_primary", + "schedule_gtfs_dataset_key": "schedule_gtfs_dataset_key_primary" + }) ) return df2 @@ -317,39 +249,25 @@ def get_rail_ferry_brt_extract() -> gpd.GeoDataFrame: start = datetime.datetime.now() - # Rail - grab_rail_data(analysis_date) - rail_stops = gpd.read_parquet(f"{TEMP_GCS}rail_stops.parquet") - - # BRT - grab_brt_data(analysis_date) - brt_stops = gpd.read_parquet(f"{TEMP_GCS}brt_stops.parquet") - brt_stops = additional_brt_filtering_out_stops( - brt_stops) + stops_route_gdf = assemble_stops(analysis_date) - # Ferry - grab_ferry_data(analysis_date) - ferry_stops = gpd.read_parquet(f"{TEMP_GCS}ferry_stops.parquet") - - # Concatenate datasets that need to be clipped to CA - rail_brt = pd.concat([ - rail_stops, - brt_stops - ], axis=0, ignore_index= True).pipe(clip_to_ca) - - # Concatenate all together - rail_brt_ferry = pd.concat([ - rail_brt, - ferry_stops - ], axis=0, ignore_index=True) + rail_stops = grab_rail_stops(stops_route_gdf) + ferry_stops = grab_ferry_stops(stops_route_gdf) + brt_stops = grab_brt_stops(stops_route_gdf) + + major_transit_stops = compile_rail_ferry_brt_stops( + [rail_stops, ferry_stops, brt_stops] + ) - # Export to GCS utils.geoparquet_gcs_export( - rail_brt_ferry, + major_transit_stops, GCS_FILE_PATH, "rail_brt_ferry" ) end = datetime.datetime.now() - logger.info(f"A1_rail_ferry_brt_stops {analysis_date} " - f"execution time: {end - start}") \ No newline at end of file + + logger.info( + f"A1_rail_ferry_brt_stops {analysis_date} " + f"execution time: {end - start}" + ) \ No newline at end of file diff --git a/high_quality_transit_areas/check1_downloads.ipynb b/high_quality_transit_areas/check1_downloads.ipynb index 0abcf7d82..b12fc7687 100644 --- a/high_quality_transit_areas/check1_downloads.ipynb +++ b/high_quality_transit_areas/check1_downloads.ipynb @@ -23,7 +23,7 @@ "from IPython.display import Markdown\n", "\n", "from segment_speed_utils import helpers\n", - "from update_vars import analysis_date, TEMP_GCS\n", + "from update_vars import analysis_date, GCS_FILE_PATH\n", "\n", "# Map arguments\n", "TILES = \"Carto DB Positron\"" @@ -47,167 +47,6 @@ " display(m)" ] }, - { - "cell_type": "markdown", - "id": "c27ae680-8516-46f9-98be-92ce69a20007", - "metadata": {}, - "source": [ - "## After `A1_download_rail_ferry_brt`\n", - "\n", - "* There are some stops to remove. \n", - "* Once finalized, can run `A2_combine_stops`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "22f1a4b9-6846-4d48-a4ba-70fa4006f155", - "metadata": {}, - "outputs": [], - "source": [ - "import A1_rail_ferry_brt_stops as rail_ferry_brt" - ] - }, - { - "cell_type": "markdown", - "id": "99d703a2-80fd-4ade-bc45-b6d36113892c", - "metadata": {}, - "source": [ - "### LA Metro (182)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a5cf0189-b403-48b0-a0ab-12d3ae2ea91f", - "metadata": {}, - "outputs": [], - "source": [ - "brt_stops = gpd.read_parquet(f\"{TEMP_GCS}brt_stops.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9bd567f3-f31d-4455-b4d3-b18369b40b0c", - "metadata": {}, - "outputs": [], - "source": [ - "brt_stops_filtered = rail_ferry_brt.additional_brt_filtering_out_stops(\n", - " brt_stops\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eafb7cf4-f20e-4409-a55d-eac9c9fb9b21", - "metadata": {}, - "outputs": [], - "source": [ - "name = \"LA Metro Bus Schedule\"\n", - "make_map(brt_stops[brt_stops.name==name], \"route_id\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7948a5cc-d0fa-4979-9747-473912fc4e55", - "metadata": {}, - "outputs": [], - "source": [ - "make_map(brt_stops_filtered[brt_stops_filtered.name==name], \n", - " \"route_id\")" - ] - }, - { - "cell_type": "markdown", - "id": "1c53ad6b-baf6-4b92-9065-e3d91ac91d93", - "metadata": {}, - "source": [ - "### SF Muni (282)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1a3829f0-62cf-4383-894d-f01188638157", - "metadata": {}, - "outputs": [], - "source": [ - "name = \"Bay Area 511 Muni Schedule\"\n", - "make_map(brt_stops[brt_stops.name==name], \"route_id\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d4b93ddd-1b85-4cbe-a2a1-c894db5d4017", - "metadata": {}, - "outputs": [], - "source": [ - "make_map(brt_stops_filtered[brt_stops_filtered.name==name], \n", - " \"route_id\")" - ] - }, - { - "cell_type": "markdown", - "id": "635410b7-55e6-4025-a30c-539dee55a7d6", - "metadata": {}, - "source": [ - "### AC Transit (4)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7a88fb7d-bda2-4b79-9fbf-d39e188f8352", - "metadata": {}, - "outputs": [], - "source": [ - "name = \"Bay Area 511 AC Transit Schedule\"\n", - "make_map(brt_stops[brt_stops.name==name], \"route_id\")" - ] - }, - { - "cell_type": "markdown", - "id": "c0ca6814-991a-4689-9de8-c0a1911bc9e2", - "metadata": {}, - "source": [ - "## After `A3_rail_ferry_brt_extract`\n", - "\n", - "#### BRT Service likely meeting [PRC 21060.2](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?lawCode=PRC§ionNum=21060.2.&highlight=true&keyword=bus%20rapid%20transit) definition:\n", - "\n", - "* LA Metro Orange, Silver excluding street running (stop flags only)\n", - "* ~~Omnitrans sbX, all stops (curbside stations are well-defined, with fare prepayment)~~\n", - " * insufficient frequency 5/16\n", - "* AC Transit Tempo, all stops (curbside stations are well-defined, with fare prepayment)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4ddca815-b484-45eb-bdaf-5f30236038d3", - "metadata": {}, - "outputs": [], - "source": [ - "stops = rail_ferry_brt.get_rail_ferry_brt_extract()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0f828224-b88b-4505-95af-a99634610ed2", - "metadata": {}, - "outputs": [], - "source": [ - "operators = helpers.import_scheduled_trips(\n", - " analysis_date,\n", - " columns = [\"feed_key\", \"name\"],\n", - " get_pandas = True\n", - ").rename(columns = {\"feed_key\": \"feed_key_primary\"})" - ] - }, { "cell_type": "code", "execution_count": null, @@ -215,6 +54,10 @@ "metadata": {}, "outputs": [], "source": [ + "stops = gpd.read_parquet(\n", + " f\"{GCS_FILE_PATH}rail_brt_ferry.parquet\"\n", + ")\n", + "\n", "hqta_types = list(stops.hqta_type.unique())" ] }, @@ -226,14 +69,9 @@ "outputs": [], "source": [ "for i in hqta_types:\n", - " subset = stops[stops.hqta_type==i].merge(\n", - " operators,\n", - " on = \"feed_key_primary\"\n", - " )\n", - " \n", " display(Markdown(f\"### HQTA Type: {i}\"))\n", " \n", - " make_map(subset, \"name\")" + " make_map(stops[stops.hqta_type==i], \"route_id\")" ] }, { From c55a6ba55832ab939b4d801f3d9ec5d98f7cfc1d Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 25 Sep 2024 17:33:00 +0000 Subject: [PATCH 07/11] (refactor): use schedule_gtfs_dataset_key instead of feed_key, simplify functions, separate out where assumptions might change --- .../A1_rail_ferry_brt_stops.py | 19 +- .../B1_create_hqta_segments.py | 5 +- .../B2_sjoin_stops_to_segments.py | 46 ++- .../C1_prep_pairwise_intersections.py | 10 +- .../C2_get_intersections.py | 15 +- .../C3_create_bus_hqta_types.py | 84 ++--- .../D1_assemble_hqta_points.py | 293 +++++++----------- .../D2_assemble_hqta_polygons.py | 124 ++++---- high_quality_transit_areas/catalog.yml | 10 +- .../logs/hqta_processing.log | 15 + high_quality_transit_areas/update_vars.py | 5 +- 11 files changed, 299 insertions(+), 327 deletions(-) diff --git a/high_quality_transit_areas/A1_rail_ferry_brt_stops.py b/high_quality_transit_areas/A1_rail_ferry_brt_stops.py index a35aa0524..7dcb87eef 100644 --- a/high_quality_transit_areas/A1_rail_ferry_brt_stops.py +++ b/high_quality_transit_areas/A1_rail_ferry_brt_stops.py @@ -13,6 +13,7 @@ from loguru import logger +import _utils from calitp_data_analysis import utils from segment_speed_utils import helpers from segment_speed_utils.project_vars import COMPILED_CACHED_VIEWS @@ -219,19 +220,16 @@ def compile_rail_ferry_brt_stops( ) keep_cols = [ - "schedule_gtfs_dataset_key", "feed_key", + "schedule_gtfs_dataset_key", "stop_id", "stop_name", "route_id", "route_type", "hqta_type", "geometry" ] df2 = (df[keep_cols] - .sort_values(["feed_key", "stop_id"]).reset_index(drop=True) - .rename(columns = { - "feed_key": "feed_key_primary", - "schedule_gtfs_dataset_key": "schedule_gtfs_dataset_key_primary" - }) - ) + .sort_values(["schedule_gtfs_dataset_key", "stop_id"]).reset_index(drop=True) + .pipe(_utils.primary_rename) + ) return df2 @@ -251,6 +249,13 @@ def compile_rail_ferry_brt_stops( stops_route_gdf = assemble_stops(analysis_date) + # let's save this to use as a crosswalk to fill in info + utils.geoparquet_gcs_export( + stops_route_gdf, + GCS_FILE_PATH, + "stops_to_route" + ) + rail_stops = grab_rail_stops(stops_route_gdf) ferry_stops = grab_ferry_stops(stops_route_gdf) brt_stops = grab_brt_stops(stops_route_gdf) diff --git a/high_quality_transit_areas/B1_create_hqta_segments.py b/high_quality_transit_areas/B1_create_hqta_segments.py index b446f4133..552573499 100644 --- a/high_quality_transit_areas/B1_create_hqta_segments.py +++ b/high_quality_transit_areas/B1_create_hqta_segments.py @@ -114,8 +114,7 @@ def select_shapes_and_segment( ).query( 'shape_array_key not in @outside_amtrak_shapes' ).drop( - columns = ["schedule_gtfs_dataset_key", - "shape_array_key", "route_length"] + columns = ["feed_key", "shape_array_key", "route_length"] ).fillna({"direction_id": 0}).astype({"direction_id": "int"}) routes_both_dir = (gdf.route_key @@ -147,7 +146,7 @@ def select_shapes_and_segment( segment_col = "segment_geometry" ) - route_cols = ["feed_key", "route_id", "route_key"] + route_cols = ["schedule_gtfs_dataset_key", "route_id", "route_key"] # Attach other route info hqta_segments = pd.merge( diff --git a/high_quality_transit_areas/B2_sjoin_stops_to_segments.py b/high_quality_transit_areas/B2_sjoin_stops_to_segments.py index 8a61cb7b8..085e170c4 100644 --- a/high_quality_transit_areas/B2_sjoin_stops_to_segments.py +++ b/high_quality_transit_areas/B2_sjoin_stops_to_segments.py @@ -7,7 +7,6 @@ """ import datetime import geopandas as gpd -import numpy as np import pandas as pd import sys @@ -15,36 +14,49 @@ from calitp_data_analysis import utils from segment_speed_utils import helpers, gtfs_schedule_wrangling -from update_vars import GCS_FILE_PATH, analysis_date, PROJECT_CRS +from update_vars import GCS_FILE_PATH, analysis_date, PROJECT_CRS, SEGMENT_BUFFER_METERS -def max_trips_by_group(df: pd.DataFrame, - group_cols: list, - max_col: str = "n_trips" - ) -> pd.DataFrame: +def max_trips_by_group( + df: pd.DataFrame, + group_cols: list, + max_col: str = "n_trips" +) -> pd.DataFrame: """ Find the max trips, by stop_id or by hqta_segment_id. Put in a list of group_cols to find the max. Can also subset for AM or PM by df[df.departure_hour < 12] """ df2 = (df.groupby(group_cols) - .agg({max_col: np.max}) + .agg({max_col: "max"}) .reset_index() ) return df2 -def stop_times_aggregation_max_by_stop(stop_times: pd.DataFrame) -> pd.DataFrame: +def stop_times_aggregation_max_by_stop( + stop_times: pd.DataFrame, + analysis_date: str +) -> pd.DataFrame: """ Take the stop_times table and group by stop_id-departure hour and count how many trips occur. """ - stop_cols = ["feed_key", "stop_id"] + stop_cols = ["schedule_gtfs_dataset_key", "stop_id"] + gtfs_key = helpers.import_scheduled_trips( + analysis_date, + columns = ["feed_key", "gtfs_dataset_key"], + get_pandas = True + ) + stop_times = stop_times.assign( departure_hour = pd.to_datetime( stop_times.departure_sec, unit="s").dt.hour + ).merge( + gtfs_key, + on = "feed_key" ) # Aggregate how many trips are made at that stop by departure hour @@ -124,7 +136,7 @@ def hqta_segment_keep_one_stop( Returns gdf where each segment only appears once. """ - stop_cols = ["feed_key", "stop_id"] + stop_cols = ["schedule_gtfs_dataset_key", "stop_id"] segment_to_stop_times = pd.merge( hqta_segments, @@ -168,7 +180,7 @@ def sjoin_stops_and_stop_times_to_hqta_segments( hqta_segments: gpd.GeoDataFrame, stops: gpd.GeoDataFrame, stop_times: pd.DataFrame, - buffer_size: int = 50, + buffer_size: int, hq_transit_threshold: int = 4, ) -> gpd.GeoDataFrame: """ @@ -200,7 +212,6 @@ def sjoin_stops_and_stop_times_to_hqta_segments( (x.pm_max_trips >= hq_transit_threshold)) else False, axis=1) ).drop(columns = drop_cols) - return segment_hq_corr @@ -221,8 +232,8 @@ def sjoin_stops_and_stop_times_to_hqta_segments( # takes 1 min max_arrivals_by_stop = helpers.import_scheduled_stop_times( analysis_date, - get_pandas = True - ).pipe(stop_times_aggregation_max_by_stop) + get_pandas = True, + ).pipe(stop_times_aggregation_max_by_stop, analysis_date) max_arrivals_by_stop.to_parquet( f"{GCS_FILE_PATH}max_arrivals_by_stop.parquet") @@ -242,7 +253,7 @@ def sjoin_stops_and_stop_times_to_hqta_segments( hqta_segments, stops, max_arrivals_by_stop, - buffer_size = 50, #50meters + buffer_size = SEGMENT_BUFFER_METERS, #50meters hq_transit_threshold = 4 ) @@ -253,7 +264,8 @@ def sjoin_stops_and_stop_times_to_hqta_segments( ) end = datetime.datetime.now() - logger.info(f"B2_sjoin_stops_to_segments {analysis_date} " - f"execution time: {end - start}") + logger.info( + f"B2_sjoin_stops_to_segments {analysis_date} " + f"execution time: {end - start}") #client.close() diff --git a/high_quality_transit_areas/C1_prep_pairwise_intersections.py b/high_quality_transit_areas/C1_prep_pairwise_intersections.py index 9e708b596..12d9ccb4f 100644 --- a/high_quality_transit_areas/C1_prep_pairwise_intersections.py +++ b/high_quality_transit_areas/C1_prep_pairwise_intersections.py @@ -35,8 +35,6 @@ def prep_bus_corridors(is_hq_corr: bool) -> gpd.GeoDataFrame: return bus_hqtc - - def sjoin_against_other_operators( in_group_df: gpd.GeoDataFrame, out_group_df: gpd.GeoDataFrame @@ -105,7 +103,7 @@ def pairwise_intersections( (corridors_gdf.hqta_segment_id.isin(segments_p2))] .drop_duplicates() .sort_values( - ["feed_key", "route_id", "hqta_segment_id"], + ["schedule_gtfs_dataset_key", "route_id", "hqta_segment_id"], ascending = [True, True, True]) .reset_index(drop=True) ) @@ -139,7 +137,9 @@ def pairwise_intersections( pairwise_intersections(corridors) end = datetime.datetime.now() - logger.info(f"C1_prep_pairwise_intersections {analysis_date} " - f"execution time: {end - start}") + logger.info( + f"C1_prep_pairwise_intersections {analysis_date} " + f"execution time: {end - start}" + ) #client.close() \ No newline at end of file diff --git a/high_quality_transit_areas/C2_get_intersections.py b/high_quality_transit_areas/C2_get_intersections.py index 30dbcb834..85ed56c20 100644 --- a/high_quality_transit_areas/C2_get_intersections.py +++ b/high_quality_transit_areas/C2_get_intersections.py @@ -12,7 +12,6 @@ import datetime import geopandas as gpd import intake -import os import pandas as pd import sys @@ -39,10 +38,10 @@ def attach_geometry_to_pairs( } - col_order = ["feed_key"] + segment_cols + list(rename_cols.values()) + col_order = ["schedule_gtfs_dataset_key"] + segment_cols + list(rename_cols.values()) pairs_with_geom1 = pd.merge( - corridors[["feed_key"] + segment_cols], + corridors[["schedule_gtfs_dataset_key"] + segment_cols], intersecting_pairs, on = "hqta_segment_id", how = "inner" @@ -57,7 +56,7 @@ def attach_geometry_to_pairs( ) gdf = (pairs_with_geom2.reindex(columns = col_order) - .sort_values(["feed_key", "hqta_segment_id", + .sort_values(["schedule_gtfs_dataset_key", "hqta_segment_id", "intersect_hqta_segment_id"]) .reset_index(drop=True) ) @@ -90,7 +89,7 @@ def find_intersections(pairs_table: gpd.GeoDataFrame) -> gpd.GeoDataFrame: # Concatenate and add this column to pairs_table, join by index gdf = pd.concat([ results_df, - pairs_table[["feed_key", "hqta_segment_id"]], + pairs_table[["schedule_gtfs_dataset_key", "hqta_segment_id"]], ], axis=1) return gdf @@ -122,7 +121,9 @@ def find_intersections(pairs_table: gpd.GeoDataFrame) -> gpd.GeoDataFrame: ) end = datetime.datetime.now() - logger.info(f"C2_find_intersections {analysis_date} " - f"execution time: {end - start}") + logger.info( + f"C2_find_intersections {analysis_date} " + f"execution time: {end - start}" + ) #client.close() \ No newline at end of file diff --git a/high_quality_transit_areas/C3_create_bus_hqta_types.py b/high_quality_transit_areas/C3_create_bus_hqta_types.py index bed753ffb..1f0574e41 100644 --- a/high_quality_transit_areas/C3_create_bus_hqta_types.py +++ b/high_quality_transit_areas/C3_create_bus_hqta_types.py @@ -16,15 +16,15 @@ from loguru import logger +import _utils import C1_prep_pairwise_intersections as prep_clip from calitp_data_analysis import utils from segment_speed_utils import helpers from update_vars import (GCS_FILE_PATH, analysis_date, - PROJECT_CRS, BUFFER_METERS + PROJECT_CRS, SEGMENT_BUFFER_METERS ) - def buffer_around_intersections(buffer_size: int) -> gpd.GeoDataFrame: """ Draw 50 m buffers around intersections to better catch stops @@ -55,28 +55,29 @@ def create_major_stop_bus( """ # Narrow down all stops to only include stops from operators # that also have some bus corridor intersection result - included_operators = bus_intersections.feed_key.unique() - major_stops = all_stops[all_stops.feed_key.isin(included_operators)] + included_operators = bus_intersections.schedule_gtfs_dataset_key.unique() + major_stops = all_stops[ + all_stops.schedule_gtfs_dataset_key.isin(included_operators) + ] major_bus_stops_in_intersections = ( gpd.sjoin( major_stops, - bus_intersections[["feed_key", "geometry"]], + bus_intersections[["schedule_gtfs_dataset_key", "geometry"]], how = "inner", - predicate = "within" - ).drop(columns = "index_right") - .drop_duplicates( - subset=["feed_key_left", "stop_id", "feed_key_right"]) + predicate = "within", + lsuffix="primary", rsuffix="secondary" + ).drop_duplicates( + subset=[ + "schedule_gtfs_dataset_key_primary", "stop_id", + "schedule_gtfs_dataset_key_secondary"]) ).reset_index(drop=True) stops_in_intersection = ( major_bus_stops_in_intersections.assign( hqta_type = "major_stop_bus", - ).rename(columns = - {"feed_key_left": "feed_key_primary", - "feed_key_right": "feed_key_secondary", - }) - [["feed_key_primary", "feed_key_secondary", + )[["schedule_gtfs_dataset_key_primary", + "schedule_gtfs_dataset_key_secondary", "stop_id", "geometry", "hqta_type"]] ) @@ -96,23 +97,24 @@ def create_stops_along_corridors(all_stops: gpd.GeoDataFrame) -> gpd.GeoDataFram [["hqta_segment_id", "geometry"]] ) - stop_cols = ["feed_key", "stop_id"] - - stops_in_hq_corr = (gpd.sjoin( - all_stops, - bus_corridors[["geometry"]], - how = "inner", - predicate = "intersects" - ).drop(columns = "index_right") - .drop_duplicates(subset=stop_cols) - .reset_index(drop=True) - ) - - stops_in_hq_corr2 = (stops_in_hq_corr.assign( - hqta_type = "hq_corridor_bus", - )[stop_cols + ["hqta_type", "geometry"]] - .rename(columns = {"feed_key": "feed_key_primary"}) - ) + stop_cols = ["schedule_gtfs_dataset_key", "stop_id"] + + stops_in_hq_corr = ( + gpd.sjoin( + all_stops, + bus_corridors[["geometry"]], + how = "inner", + predicate = "intersects" + ).drop_duplicates(subset=stop_cols) + .reset_index(drop=True) + ) + + stops_in_hq_corr2 = ( + stops_in_hq_corr.assign( + hqta_type = "hq_corridor_bus", + )[stop_cols + ["hqta_type", "geometry"]] + .pipe(_utils.primary_rename) + ) return stops_in_hq_corr2 @@ -131,14 +133,24 @@ def create_stops_along_corridors(all_stops: gpd.GeoDataFrame) -> gpd.GeoDataFram # Start with the gdf of all the hqta_segments # that have a sjoin with an orthogonal route - bus_intersections = buffer_around_intersections(BUFFER_METERS) + bus_intersections = buffer_around_intersections(SEGMENT_BUFFER_METERS) # Grab point geom with all stops + gtfs_keys = helpers.import_scheduled_trips( + analysis_date, + columns = ["feed_key", "gtfs_dataset_key"], + get_pandas=True + ) + all_stops = helpers.import_scheduled_stops( analysis_date, get_pandas = True, + columns = ["feed_key", "stop_id", "geometry"], crs = PROJECT_CRS - ) + ).merge( + gtfs_keys, + on = "feed_key", + ).drop(columns = "feed_key") # Create hqta_type == major_stop_bus major_stop_bus = create_major_stop_bus(all_stops, bus_intersections) @@ -160,7 +172,9 @@ def create_stops_along_corridors(all_stops: gpd.GeoDataFrame) -> gpd.GeoDataFram ) end = datetime.datetime.now() - logger.info(f"C3_create_bus_hqta_types {analysis_date} " - f"execution time: {end - start}") + logger.info( + f"C3_create_bus_hqta_types {analysis_date} " + f"execution time: {end - start}" + ) #client.close() \ No newline at end of file diff --git a/high_quality_transit_areas/D1_assemble_hqta_points.py b/high_quality_transit_areas/D1_assemble_hqta_points.py index 4226bedb5..4441b38c7 100644 --- a/high_quality_transit_areas/D1_assemble_hqta_points.py +++ b/high_quality_transit_areas/D1_assemble_hqta_points.py @@ -17,198 +17,123 @@ from loguru import logger -from A1_rail_ferry_brt_stops import clip_to_ca, get_rail_ferry_brt_extract +import _utils from calitp_data_analysis import geography_utils, utils from segment_speed_utils import helpers from shared_utils import gtfs_utils_v2 -from update_vars import analysis_date, GCS_FILE_PATH, PROJECT_CRS +from update_vars import analysis_date, GCS_FILE_PATH, PROJECT_CRS, EXPORT_PATH catalog = intake.open_catalog("*.yml") -EXPORT_PATH = f"{GCS_FILE_PATH}export/{analysis_date}/" -def hqta_details(row) -> str: +def combine_stops_by_hq_types(crs: str) -> gpd.GeoDataFrame: """ - Add HQTA details of why nulls are present - based on feedback from open data users. - """ - if row.hqta_type == "major_stop_bus": - if row.feed_key_primary != row.feed_key_secondary: - return "intersection_2_bus_routes_different_operators" - else: - return "intersection_2_bus_routes_same_operator" - elif row.hqta_type == "hq_corridor_bus": - return "stop_along_hq_bus_corridor_single_operator" - elif row.hqta_type in ["major_stop_ferry", - "major_stop_brt", "major_stop_rail"]: - # (not sure if ferry, brt, rail, primary/secondary ids are filled in.) - return row.hqta_type + "_single_operator" - - -def merge_in_max_arrivals_by_stop( - hqta_points: gpd.GeoDataFrame, - max_arrivals: pd.DataFrame -) -> gpd.GeoDataFrame: - """ - Merge combined hqta points across all categories with + Concatenate combined hqta points across all categories then merge in the maximum arrivals for each stop (keep if it shows up in hqta_points) with left merge. - """ - with_stops = pd.merge( - hqta_points, - max_arrivals.rename(columns = {"feed_key": "feed_key_primary"}), - on = ["feed_key_primary", "stop_id"], - how = "left" - ) + """ + rail_ferry_brt = catalog.rail_brt_ferry_stops.read().to_crs( + crs) + major_stop_bus = catalog.major_stop_bus.read().to_crs(crs) + stops_in_corridor = catalog.stops_in_hq_corr.read().to_crs(crs) - # Combine AM max and PM max into 1 column trip_count_cols = ["am_max_trips", "pm_max_trips"] - - with_stops2 = with_stops.assign( - peak_trips = (with_stops[trip_count_cols] - .min(axis=1) - .fillna(0).astype(int)) + + max_arrivals = pd.read_parquet( + f"{GCS_FILE_PATH}max_arrivals_by_stop.parquet", + columns = ["schedule_gtfs_dataset_key", + "stop_id"] + trip_count_cols + ).pipe(_utils.primary_rename) + + # Combine AM max and PM max into 1 column + # if am_max_trips = 4 and pm_max_trips = 5, we'll choose 4. + max_arrivals = max_arrivals.assign( + peak_trips = max_arrivals[trip_count_cols].min(axis=1) ).drop(columns = trip_count_cols) - return with_stops2 - + hqta_points_combined = pd.concat([ + major_stop_bus, + stops_in_corridor, + rail_ferry_brt, + ], axis=0) -def add_route_info(hqta_points: gpd.GeoDataFrame) -> gpd.GeoDataFrame: - """ - Use feed_key-stop_id to add route_id back in, - using the trips and stop_times table. - """ - stop_times = helpers.import_scheduled_stop_times( - analysis_date, - columns = ["feed_key", "stop_id", "trip_id"], - get_pandas = True, - with_direction = False - ) - - trips = helpers.import_scheduled_trips( - analysis_date, - columns = ["feed_key", "gtfs_dataset_key", "trip_id", "route_id"], - get_pandas = True - ) + # Merge in max arrivals + with_stops = pd.merge( + hqta_points_combined, + max_arrivals, + on = ["schedule_gtfs_dataset_key_primary", "stop_id"], + how = "left" + ).fillna({"peak_trips": 0}).astype({"peak_trips": "int"}) - stop_cols = ["feed_key", "stop_id"] - trip_cols = ["feed_key", "trip_id"] + keep_stop_cols = [ + "schedule_gtfs_dataset_key_primary", "schedule_gtfs_dataset_key_secondary", + "stop_id", "geometry", + "hqta_type", "peak_trips", "hqta_details" + ] - one_trip = (stop_times[stop_cols + ["trip_id"]] - .drop_duplicates(subset=stop_cols) - .reset_index(drop=True) - ) + with_stops = with_stops.assign( + hqta_details = with_stops.apply(_utils.add_hqta_details, axis=1) + )[keep_stop_cols] - with_route_info = pd.merge( - one_trip, - trips[trip_cols + [ - "schedule_gtfs_dataset_key", "route_id" - ]].drop_duplicates(), - on = trip_cols, - how = "inner", - validate = "m:1" # one_trip has many stops for that trip - ).rename(columns = {"feed_key": "feed_key_primary"}) + return with_stops - hqta_points_with_route = pd.merge( - hqta_points, - with_route_info, - on = ["feed_key_primary", "stop_id"], - how = "inner", - validate = "m:1" - ).drop(columns = "trip_id") - - # Clip to CA -- remove ferry or else we're losing it in the clip - not_ferry = hqta_points_with_route[ - hqta_points_with_route.hqta_type != "major_stop_ferry" - ].pipe(clip_to_ca) - - is_ferry = hqta_points_with_route[ - hqta_points_with_route.hqta_type == "major_stop_ferry"] - - ca_hqta_points = pd.concat( - [not_ferry, is_ferry], axis=0 - ).reset_index(drop=True) - - return ca_hqta_points - -def get_agency_info(df: pd.DataFrame, date: str) -> pd.DataFrame: +def get_agency_crosswalk(analysis_date: str) -> pd.DataFrame: """ - HQTA analysis uses feed_key to link across schedule tables. - But, from trips table, we have schedule_gtfs_dataset_key, - and we can use that to join to our saved crosswalk. + Import crosswalk for changing schedule_gtfs_dataset_key to + organization_name/source_record_id """ - crosswalk = helpers.import_schedule_gtfs_key_organization_crosswalk( - date, + agency_info = helpers.import_schedule_gtfs_key_organization_crosswalk( + analysis_date, columns = [ "schedule_gtfs_dataset_key", "organization_name", "organization_source_record_id", "base64_url"] - ).rename(columns = { + ).rename(columns = { "organization_name": "agency", "organization_source_record_id": "org_id" - })[["schedule_gtfs_dataset_key", - "agency", "org_id", "base64_url"]] - - return crosswalk + }) + return agency_info -def add_agency_names_hqta_details( + +def add_route_agency_info( gdf: gpd.GeoDataFrame, analysis_date: str -) -> gpd.GeoDataFrame: +) -> gpd.GeoDataFrame : """ - Add agency names by merging it in with our crosswalk - to get the primary feed_key and primary agency name. + Make sure route info is filled in for all stops. - Then use a function to add secondary feed_key and secondary agency name - and hqta_details column. - hqta_details makes it clearer for open data portal users why - some ID / agency name columns show the same info or are missing. + Add agency names by merging it in with our crosswalk + and populate primary and secondary operator information. """ - feeds_df = gdf.rename( - columns = {"feed_key_primary": "feed_key"})[ - ["feed_key", "schedule_gtfs_dataset_key"] - ].drop_duplicates() + stop_with_route_crosswalk = catalog.stops_info_crosswalk.read() - crosswalk = get_agency_info(feeds_df, analysis_date) + agency_info = get_agency_crosswalk(analysis_date) - agency_info = pd.merge( - feeds_df, - crosswalk, - on = "schedule_gtfs_dataset_key", - how = "inner" - ).drop(columns = "schedule_gtfs_dataset_key") - - # Merge in organization ids for feed_key_primary - # and feed_key_secondary + # Make sure all the stops have route_id gdf2 = pd.merge( gdf, + stop_with_route_crosswalk[ + ["schedule_gtfs_dataset_key", + "stop_id", "route_id"]].drop_duplicates().pipe(_utils.primary_rename), + on = ["schedule_gtfs_dataset_key_primary", "stop_id"], + how = "inner" + ) + + # Make sure gtfs_dataset_name and organization columns are added + gdf3 = pd.merge( + gdf2, agency_info.add_suffix("_primary"), - on = "feed_key_primary", + on = "schedule_gtfs_dataset_key_primary", how = "inner" ).merge( agency_info.add_suffix("_secondary"), - on = "feed_key_secondary", + on = "schedule_gtfs_dataset_key_secondary", how = "left" # left bc we don't want to drop rows that have secondary operator ) - - gdf2 = gdf2.assign( - hqta_details = gdf2.apply(hqta_details, axis=1), - ) - - # Additional clarification of hq_corridor_bus, - # only for hqta_stops, not hqta_polygons - gdf2["hqta_details"] = gdf2.apply( - lambda x: "corridor_frequent_stop" if ( - (x.hqta_type == "hq_corridor_bus") and - (x.peak_trips >= 4) - ) else "corridor_other_stop" if ( - (x.hqta_type == "hq_corridor_bus") and - (x.peak_trips < 4) - ) else x.hqta_details, axis = 1) - return gdf2 + return gdf3 def final_processing(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: @@ -217,6 +142,19 @@ def final_processing(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: Subset to columns, drop duplicates, sort for readability, always project into WGS84. """ + # Clip to CA -- remove ferry or else we're losing it in the clip + not_ferry = gdf[ + gdf.hqta_type != "major_stop_ferry" + ].pipe(_utils.clip_to_ca) + + is_ferry = gdf[ + gdf.hqta_type == "major_stop_ferry" + ] + + gdf2 = pd.concat( + [not_ferry, is_ferry], axis=0 + ).reset_index(drop=True) + public_feeds = gtfs_utils_v2.filter_to_public_schedule_gtfs_dataset_keys() keep_cols = [ @@ -230,9 +168,10 @@ def final_processing(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: "geometry" ] - gdf2 = ( - gdf[gdf.schedule_gtfs_dataset_key.isin(public_feeds)] - .reindex(columns = keep_cols) + gdf3 = ( + gdf2[ + (gdf2.schedule_gtfs_dataset_key_primary.isin(public_feeds)) + ].reindex(columns = keep_cols) .drop_duplicates( subset=["agency_primary", "hqta_type", "stop_id", "route_id"]) .sort_values(["agency_primary", "hqta_type", "stop_id"]) @@ -240,8 +179,8 @@ def final_processing(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: .to_crs(geography_utils.WGS84) ) - return gdf2 - + return gdf3 + if __name__=="__main__": @@ -251,42 +190,15 @@ def final_processing(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: level="INFO") start = datetime.datetime.now() - - rail_ferry_brt = get_rail_ferry_brt_extract().to_crs( - PROJECT_CRS) - major_stop_bus = catalog.major_stop_bus.read().to_crs(PROJECT_CRS) - stops_in_corridor = catalog.stops_in_hq_corr.read().to_crs(PROJECT_CRS) - max_arrivals_by_stop = pd.read_parquet( - f"{GCS_FILE_PATH}max_arrivals_by_stop.parquet", - columns = ["feed_key", "stop_id", "am_max_trips", "pm_max_trips"] - ).rename(columns = {"feed_key": "feed_key_primary"}) - - # Combine all the points data - hqta_points_combined = pd.concat([ - major_stop_bus, - stops_in_corridor, - # add name at once, rail/ferry/brt is only one with it... - # but we used it to double check downloads were correct - rail_ferry_brt, - ], axis=0) - - hqta_points_combined2 = merge_in_max_arrivals_by_stop( - hqta_points_combined, max_arrivals_by_stop) - - # Add in route_id - hqta_points_with_route_info = add_route_info(hqta_points_combined2) - - # Add agency names, hqta_details, project back to WGS84 - gdf = add_agency_names_hqta_details( - hqta_points_with_route_info, analysis_date - ) - - cols = [i for i in gdf.columns if "_primary" in i or "_secondary" in i] - gdf[cols].drop_duplicates().reset_index(drop=True).to_parquet( - f"{GCS_FILE_PATH}feed_key_org_crosswalk.parquet" - ) - - gdf = final_processing(gdf) + + # Combine all the points data and merge in max_arrivals + hqta_points_combined = combine_stops_by_hq_types(crs=PROJECT_CRS) + + # Add in route_id and agency info + hqta_points_with_info = add_route_agency_info( + hqta_points_combined, analysis_date) + + gdf = final_processing(hqta_points_with_info) # Export to GCS # Stash this date's into its own folder @@ -304,5 +216,8 @@ def final_processing(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: ) end = datetime.datetime.now() - logger.info(f"D1_assemble_hqta_points {analysis_date} " - f"execution time: {end - start}") \ No newline at end of file + logger.info( + f"D1_assemble_hqta_points {analysis_date} " + f"execution time: {end - start}" + ) + \ No newline at end of file diff --git a/high_quality_transit_areas/D2_assemble_hqta_polygons.py b/high_quality_transit_areas/D2_assemble_hqta_polygons.py index 1270c630a..7f0328b7e 100644 --- a/high_quality_transit_areas/D2_assemble_hqta_polygons.py +++ b/high_quality_transit_areas/D2_assemble_hqta_polygons.py @@ -11,72 +11,85 @@ from loguru import logger -import C1_prep_pairwise_intersections as prep_clip -import D1_assemble_hqta_points as assemble_hqta_points +import _utils +from C1_prep_pairwise_intersections import prep_bus_corridors +from D1_assemble_hqta_points import get_agency_crosswalk from calitp_data_analysis import utils, geography_utils -from D1_assemble_hqta_points import EXPORT_PATH, add_route_info -from update_vars import GCS_FILE_PATH, analysis_date, PROJECT_CRS +from update_vars import (GCS_FILE_PATH, analysis_date, PROJECT_CRS, EXPORT_PATH, + HALF_MILE_BUFFER_METERS, CORRIDOR_BUFFER_METERS + ) catalog = intake.open_catalog("*.yml") -def get_dissolved_hq_corridor_bus( - gdf: gpd.GeoDataFrame, - analysis_date: str +def buffer_hq_corridor_bus( + analysis_date: str, + buffer_meters: int, ) -> gpd.GeoDataFrame: """ - Take each segment, then dissolve by operator, - and use this dissolved polygon in hqta_polygons. + Buffer hq bus corridors. - Draw a buffer around this. + Start with bus corridors, filter to those that are high quality, + and do a dissolve. + After the dissolve, buffer by an additional amount to + get the full 0.5 mile buffer. """ - # Can keep route_id in dissolve, but route_id is not kept in final - # export, so there would be multiple rows for multiple route_ids, - # and no way to distinguish between them - keep_cols = ['feed_key', 'hq_transit_corr', 'route_id'] + gdf = prep_bus_corridors( + is_hq_corr=True + ).to_crs(PROJECT_CRS) + + keep_cols = ['schedule_gtfs_dataset_key', 'route_id'] dissolved = (gdf[keep_cols + ["geometry"]] .dissolve(by=keep_cols) .reset_index() ) - # For hq_corridor_bus, we have feed_key again, and need to - # add agency_name, or else this category will have missing name values - corridor_cols = [ - "feed_key", "hqta_type", "route_id", "geometry" - ] + # Bus corridors are already buffered 50 meters, + # so will buffer 705 meters to get 0.5 mile radius corridors = dissolved.assign( - geometry = dissolved.geometry.buffer(755), + geometry = dissolved.geometry.buffer(buffer_meters), # overwrite hqta_type for this polygon hqta_type = "hq_corridor_bus", - )[corridor_cols].rename( - columns = {"feed_key": "feed_key_primary"} - ) - - crosswalk = pd.read_parquet( - f"{GCS_FILE_PATH}feed_key_org_crosswalk.parquet" - ) - primary_agency_cols = [i for i in crosswalk.columns if "_primary" in i] + ).pipe(_utils.primary_rename) - crosswalk = crosswalk[primary_agency_cols].drop_duplicates() + agency_info = get_agency_crosswalk(analysis_date) + # Make sure gtfs_dataset_name and organization columns are added corridors2 = pd.merge( corridors, - crosswalk, - on = "feed_key_primary", + agency_info.add_suffix("_primary"), + on = "schedule_gtfs_dataset_key_primary", how = "inner" ) corridors2 = corridors2.assign( - hqta_details = corridors2.apply( - assemble_hqta_points.hqta_details, axis=1), + hqta_details = "stop_along_hq_bus_corridor_single_operator" ) - + return corridors2 -def filter_and_buffer( - hqta_points: gpd.GeoDataFrame, - hqta_segments: gpd.GeoDataFrame, +def buffer_major_transit_stops( + buffer_meters: int +) -> gpd.GeoDataFrame: + """ + Buffer major transit stops. + Start with hqta points and filter out the hq_corridor_bus types. + """ + hqta_points = catalog.hqta_points.read().to_crs(PROJECT_CRS) + + stops = hqta_points[hqta_points.hqta_type != "hq_corridor_bus"] + + # General buffer distance: 1/2mi ~= 805 meters + # stops are already buffered + stops = stops.assign( + geometry = stops.geometry.buffer(buffer_meters) + ) + + return stops + + +def combine_corridors_and_stops( analysis_date: str ) -> gpd.GeoDataFrame: """ @@ -85,19 +98,18 @@ def filter_and_buffer( Buffers are already drawn for corridors and stops, so draw new buffers, and address each hqta_type separately. """ - stops = hqta_points[hqta_points.hqta_type != "hq_corridor_bus"] - - corridors = get_dissolved_hq_corridor_bus(hqta_segments, analysis_date) + corridors = buffer_hq_corridor_bus( + analysis_date, + buffer_meters = CORRIDOR_BUFFER_METERS, + ) - # General buffer distance: 1/2mi ~= 805 meters - # Bus corridors are already buffered 100 meters, so will buffer 705 meters - stops = stops.assign( - geometry = stops.geometry.buffer(705) + major_transit_stops = buffer_major_transit_stops( + buffer_meters = HALF_MILE_BUFFER_METERS ) hqta_polygons = pd.concat([ corridors, - stops + major_transit_stops ], axis=0).to_crs(geography_utils.WGS84) return hqta_polygons @@ -106,7 +118,6 @@ def filter_and_buffer( def final_processing(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ Drop extra columns, get sorting done. - Used to drop bad stops, but these all look ok. """ keep_cols = [ "agency_primary", "agency_secondary", @@ -116,7 +127,6 @@ def final_processing(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: "geometry" ] - # Drop bad stops, subset columns gdf2 = ( gdf[keep_cols] .drop_duplicates() @@ -138,17 +148,8 @@ def final_processing(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: start = datetime.datetime.now() - hqta_points = catalog.hqta_points.read().to_crs(PROJECT_CRS) - bus_hq_corr = prep_clip.prep_bus_corridors( - is_hq_corr=True - ).to_crs(PROJECT_CRS) - - # Filter and buffer for stops (805 m) and corridors (755 m) - # and add agency_names - gdf = filter_and_buffer( - hqta_points, bus_hq_corr, analysis_date - ).pipe(final_processing) - + gdf = combine_corridors_and_stops(analysis_date).pipe(final_processing) + # Export to GCS utils.geoparquet_gcs_export( gdf, @@ -164,5 +165,8 @@ def final_processing(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: ) end = datetime.datetime.now() - logger.info(f"D2_assemble_hqta_polygons {analysis_date} " - f"execution time: {end - start}") \ No newline at end of file + logger.info( + f"D2_assemble_hqta_polygons {analysis_date} " + f"execution time: {end - start}" + ) + \ No newline at end of file diff --git a/high_quality_transit_areas/catalog.yml b/high_quality_transit_areas/catalog.yml index eb478de3d..ad92b3417 100644 --- a/high_quality_transit_areas/catalog.yml +++ b/high_quality_transit_areas/catalog.yml @@ -15,9 +15,15 @@ sources: urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/ca_boundary.parquet ## INTERMEDIATE DATA # Source: A1_rail_ferry_brt_stops.py + stops_info_crosswalk: + driver: geoparquet + description: Assembled stop gdf with route info from trips table. + args: + urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/stops_to_route.parquet + # Source: A1_rail_ferry_brt_stops.py rail_brt_ferry_initial: driver: geoparquet - description: Rail / BRT / Ferry points created in A2_combine_stops.py + description: All the major transit stops (Rail / BRT / Ferry). args: urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/rail_brt_ferry.parquet # Source: B1_create_hqta_segments.py @@ -35,7 +41,7 @@ sources: # Source: C1_prep_pairwise_intersections.py pairwise_intersections: driver: parquet - description: Use dask_geopandas.sjoin to find which hqta segments do intersect at some point. Created in C1_prep_pairwise_intersections.py. + description: Use spatial join to find which hqta segments do intersect at some point. Created in C1_prep_pairwise_intersections.py. args: urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/pairwise.parquet # Source: C1_prep_pairwise_intersections.py diff --git a/high_quality_transit_areas/logs/hqta_processing.log b/high_quality_transit_areas/logs/hqta_processing.log index 66aa62c21..3811550e5 100644 --- a/high_quality_transit_areas/logs/hqta_processing.log +++ b/high_quality_transit_areas/logs/hqta_processing.log @@ -142,3 +142,18 @@ 2024-09-19 09:32:36.082 | INFO | __main__::163 - C3_create_bus_hqta_types 2024-09-18 execution time: 0:00:37.486499 2024-09-19 09:33:22.863 | INFO | __main__::307 - D1_assemble_hqta_points 2024-09-18 execution time: 0:00:27.668799 2024-09-19 09:36:35.489 | INFO | __main__::167 - D2_assemble_hqta_polygons 2024-09-18 execution time: 0:00:26.678607 +2024-09-19 14:07:11.844 | INFO | __main__::269 - A1_rail_ferry_brt_stops 2024-09-18 execution time: 0:00:16.390254 +2024-09-19 14:10:48.407 | INFO | __main__::276 - A1_rail_ferry_brt_stops 2024-09-18 execution time: 0:00:17.570946 +2024-09-19 14:51:15.926 | INFO | __main__::248 - B1_create_hqta_segments execution time: 0:04:11.305371 +2024-09-19 14:55:50.079 | INFO | __main__::268 - B2_sjoin_stops_to_segments 2024-09-18 execution time: 0:00:50.195775 +2024-09-19 14:56:18.048 | INFO | __main__::140 - C1_prep_pairwise_intersections 2024-09-18 execution time: 0:00:09.222281 +2024-09-19 14:57:06.861 | INFO | __main__::124 - C2_find_intersections 2024-09-18 execution time: 0:00:32.170864 +2024-09-19 15:04:46.269 | INFO | __main__::176 - C3_create_bus_hqta_types 2024-09-18 execution time: 0:00:28.423966 +2024-09-25 10:10:35.837 | INFO | __main__::275 - A1_rail_ferry_brt_stops 2024-09-18 execution time: 0:00:16.979217 +2024-09-25 10:18:42.737 | INFO | __main__::248 - B1_create_hqta_segments execution time: 0:07:48.832296 +2024-09-25 10:20:17.796 | INFO | __main__::267 - B2_sjoin_stops_to_segments 2024-09-18 execution time: 0:01:12.987290 +2024-09-25 10:20:52.130 | INFO | __main__::140 - C1_prep_pairwise_intersections 2024-09-18 execution time: 0:00:10.962771 +2024-09-25 10:22:04.117 | INFO | __main__::124 - C2_find_intersections 2024-09-18 execution time: 0:00:49.634882 +2024-09-25 10:25:48.480 | INFO | __main__::175 - C3_create_bus_hqta_types 2024-09-18 execution time: 0:00:26.100650 +2024-09-25 10:27:51.226 | INFO | __main__::219 - D1_assemble_hqta_points 2024-09-18 execution time: 0:00:18.606053 +2024-09-25 10:30:59.443 | INFO | __main__::168 - D2_assemble_hqta_polygons 2024-09-18 execution time: 0:00:30.215506 diff --git a/high_quality_transit_areas/update_vars.py b/high_quality_transit_areas/update_vars.py index dffb41557..d35684264 100644 --- a/high_quality_transit_areas/update_vars.py +++ b/high_quality_transit_areas/update_vars.py @@ -4,8 +4,9 @@ GCS_FILE_PATH = ("gs://calitp-analytics-data/data-analyses/" "high_quality_transit_areas/") -TEMP_GCS = f"{GCS_FILE_PATH}temp/" PROJECT_CRS = "EPSG:3310" HQTA_SEGMENT_LENGTH = 1_250 # meters -BUFFER_METERS = 50 +SEGMENT_BUFFER_METERS = 50 # buffer around segment to sjoin to stops +HALF_MILE_BUFFER_METERS = 805 # half mile ~ 805 meters +CORRIDOR_BUFFER_METERS = HALF_MILE_BUFFER_METERS - SEGMENT_BUFFER_METERS # 755 meters EXPORT_PATH = f"{GCS_FILE_PATH}export/{analysis_date}/" \ No newline at end of file From 1ff8a34ef7ceb49342485b911b2cd1a06e0dbb78 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 25 Sep 2024 17:36:27 +0000 Subject: [PATCH 08/11] rename files, use makefile to set order --- high_quality_transit_areas/Makefile | 16 ++++---- high_quality_transit_areas/README.md | 20 +++++----- ...hqta_points.py => assemble_hqta_points.py} | 0 ..._polygons.py => assemble_hqta_polygons.py} | 4 +- high_quality_transit_areas/catalog.yml | 38 +++++++++---------- ...hqta_types.py => create_bus_hqta_types.py} | 5 +-- ...ta_segments.py => create_hqta_segments.py} | 0 ..._intersections.py => get_intersections.py} | 0 ...ions.py => prep_pairwise_intersections.py} | 0 ...y_brt_stops.py => rail_ferry_brt_stops.py} | 0 ...segments.py => sjoin_stops_to_segments.py} | 0 11 files changed, 40 insertions(+), 43 deletions(-) rename high_quality_transit_areas/{D1_assemble_hqta_points.py => assemble_hqta_points.py} (100%) rename high_quality_transit_areas/{D2_assemble_hqta_polygons.py => assemble_hqta_polygons.py} (97%) rename high_quality_transit_areas/{C3_create_bus_hqta_types.py => create_bus_hqta_types.py} (97%) rename high_quality_transit_areas/{B1_create_hqta_segments.py => create_hqta_segments.py} (100%) rename high_quality_transit_areas/{C2_get_intersections.py => get_intersections.py} (100%) rename high_quality_transit_areas/{C1_prep_pairwise_intersections.py => prep_pairwise_intersections.py} (100%) rename high_quality_transit_areas/{A1_rail_ferry_brt_stops.py => rail_ferry_brt_stops.py} (100%) rename high_quality_transit_areas/{B2_sjoin_stops_to_segments.py => sjoin_stops_to_segments.py} (100%) diff --git a/high_quality_transit_areas/Makefile b/high_quality_transit_areas/Makefile index 6130489cc..3f9860c33 100644 --- a/high_quality_transit_areas/Makefile +++ b/high_quality_transit_areas/Makefile @@ -1,12 +1,12 @@ hqta_data: - python A1_rail_ferry_brt_stops.py - python B1_create_hqta_segments.py - python B2_sjoin_stops_to_segments.py - python C1_prep_pairwise_intersections.py - python C2_get_intersections.py - python C3_create_bus_hqta_types.py - python D1_assemble_hqta_points.py - python D2_assemble_hqta_polygons.py + python rail_ferry_brt_stops.py + python create_hqta_segments.py + python sjoin_stops_to_segments.py + python prep_pairwise_intersections.py + python get_intersections.py + python create_bus_hqta_types.py + python assemble_hqta_points.py + python assemble_hqta_polygons.py # Only need this is operator input changes # For now, Muni sent over a date-versioned list of stops diff --git a/high_quality_transit_areas/README.md b/high_quality_transit_areas/README.md index 44066ee52..45fb2f624 100644 --- a/high_quality_transit_areas/README.md +++ b/high_quality_transit_areas/README.md @@ -80,22 +80,20 @@ If not, within the `gtfs_funnel` directory, run `make download_gtfs_data` in the In terminal: `make hqta_data` to run through entire workflow. -1. [Compile rail, ferry, brt data](./A1_rail_ferry_brt_stops.py) - * Sanity check: [check 1: downloads](./check1_downloads.ipynb) -1. [Draw bus corridors, from routes to HQTA segments](./B1_create_hqta_segments.py) +1. [Compile rail, ferry, brt data](./rail_ferry_brt_stops.py) +1. [Draw bus corridors, from routes to HQTA segments](./create_hqta_segments.py) * Across all operators, find the longest shapes in each direction. Use a symmetric difference to grab the components that make up the route network. * Cut route into HQTA segments. Every segment is 1,250 m. * Add in route direction. -1. [Combine operator HQTA areas across operators](./B2_sjoin_stops_to_segments.py) +1. [Combine operator HQTA areas across operators](./sjoin_stops_to_segments.py) * Attach number of stop arrivals that occur in the AM and PM and find the max * Do spatial join of stops to HQTA segments. Where multiple stops are present, keep the stop with the highest number of trips. -1. [Use pairwise table to store which segments intersect](./C1_prep_pairwise_intersections.py) +1. [Use pairwise table to store which segments intersect](./prep_pairwise_intersections.py) * Find which routes actually do intersect, and store that in a pairwise table. -1. [Find where corridors intersect](./C2_get_intersections.py) -1. [Create datasets for each of the hqta types](./C3_create_bus_hqta_types.py) +1. [Find where corridors intersect](./get_intersections.py) +1. [Create datasets for each of the hqta types](./create_bus_hqta_types.py) * `major_stop_bus`: the bus stop within the above intersection does not necessarily have the highest trip count * `hq_corridor_bus`: stops along the HQ transit corr (may not be highest trip count) - * Sanity check: [check 2: hq corridors](./check2_hq_corridors.ipynb) -1. [Compile and export HQTA areas as points](./D1_assemble_hqta_points.py) - * Sanity check: [check 3: hqta points](./check3_hqta_points.ipynb) -1. [Compile and export HQTA areas as polygons](./D2_assemble_hqta_polygons.py) +1. [Compile and export HQTA areas as points](./assemble_hqta_points.py) + * Sanity check: [check 3: hqta points](./check_hqta_points.ipynb) +1. [Compile and export HQTA areas as polygons](./assemble_hqta_polygons.py) diff --git a/high_quality_transit_areas/D1_assemble_hqta_points.py b/high_quality_transit_areas/assemble_hqta_points.py similarity index 100% rename from high_quality_transit_areas/D1_assemble_hqta_points.py rename to high_quality_transit_areas/assemble_hqta_points.py diff --git a/high_quality_transit_areas/D2_assemble_hqta_polygons.py b/high_quality_transit_areas/assemble_hqta_polygons.py similarity index 97% rename from high_quality_transit_areas/D2_assemble_hqta_polygons.py rename to high_quality_transit_areas/assemble_hqta_polygons.py index 7f0328b7e..7558c68ef 100644 --- a/high_quality_transit_areas/D2_assemble_hqta_polygons.py +++ b/high_quality_transit_areas/assemble_hqta_polygons.py @@ -12,8 +12,8 @@ from loguru import logger import _utils -from C1_prep_pairwise_intersections import prep_bus_corridors -from D1_assemble_hqta_points import get_agency_crosswalk +from prep_pairwise_intersections import prep_bus_corridors +from assemble_hqta_points import get_agency_crosswalk from calitp_data_analysis import utils, geography_utils from update_vars import (GCS_FILE_PATH, analysis_date, PROJECT_CRS, EXPORT_PATH, HALF_MILE_BUFFER_METERS, CORRIDOR_BUFFER_METERS diff --git a/high_quality_transit_areas/catalog.yml b/high_quality_transit_areas/catalog.yml index ad92b3417..de57d1fff 100644 --- a/high_quality_transit_areas/catalog.yml +++ b/high_quality_transit_areas/catalog.yml @@ -14,69 +14,69 @@ sources: args: urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/ca_boundary.parquet ## INTERMEDIATE DATA - # Source: A1_rail_ferry_brt_stops.py + # Source: rail_ferry_brt_stops.py stops_info_crosswalk: driver: geoparquet description: Assembled stop gdf with route info from trips table. args: urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/stops_to_route.parquet - # Source: A1_rail_ferry_brt_stops.py - rail_brt_ferry_initial: + # Source: rail_ferry_brt_stops.py + rail_brt_ferry_stops: driver: geoparquet description: All the major transit stops (Rail / BRT / Ferry). args: urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/rail_brt_ferry.parquet - # Source: B1_create_hqta_segments.py + # Source: create_hqta_segments.py hqta_segments: driver: geoparquet - description: Cut HQTA segments across all operators. Created in B1_create_hqta_segments.py. + description: Cut HQTA segments across all operators. args: urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/hqta_segments.parquet - # Source: B2_sjoin_stops_to_segments.py + # Source: sjoin_stops_to_segments.py all_bus: driver: geoparquet - description: Combined hqta corridors across all operators. Created in B2_sjoin_stops_to_segments.py. + description: Combined hqta corridors across all operators. args: urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/all_bus.parquet - # Source: C1_prep_pairwise_intersections.py + # Source: prep_pairwise_intersections.py pairwise_intersections: driver: parquet - description: Use spatial join to find which hqta segments do intersect at some point. Created in C1_prep_pairwise_intersections.py. + description: Use spatial join to find which hqta segments do intersect at some point. args: urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/pairwise.parquet - # Source: C1_prep_pairwise_intersections.py + # Source: prep_pairwise_intersections.py subset_corridors: driver: geoparquet - description: Narrow down `all_bus` to hqta segments that are found in `pairwise_intersections`. Created in C1_prep_pairwise_intersections.py. + description: Narrow down `all_bus` to hqta segments that are found in `pairwise_intersections`. args: urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/subset_corridors.parquet - # Source: C2_get_intersections.py + # Source: get_intersections.py all_intersections: driver: geoparquet description: Find where 2 bus corridors intersect by doing an intersection between the hqta segments. args: urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/all_intersections.parquet - # Source: C3_create_bus_hqta_types.py + # Source: create_bus_hqta_types.py major_stop_bus: driver: geoparquet - description: Bus stops that are within the bus intersections. Created in C3_create_bus_hqta_types.py. + description: Bus stops that are within the bus intersections. args: urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/major_stop_bus.parquet stops_in_hq_corr: driver: geoparquet - description: Bus stops that are within the HQ corridors, even if they stops themselves do not have a lot of trips pass through. Created in C3_create_bus_hqta_types.py. + description: Bus stops that are within the HQ corridors, even if they stops themselves do not have a lot of trips pass through. args: urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/stops_in_hq_corr.parquet ## FINAL DATA - # Source: D1_assemble_hqta_points + # Source: assemble_hqta_points hqta_points: driver: geoparquet - description: Combined point data for all HQTA types. Created in D1_assemble_hqta_points.py. + description: Combined point data for all HQTA types. args: urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/hqta_points.parquet - # Source: D2_assemble_hqta_polygons + # Source: assemble_hqta_polygons hqta_areas: driver: geoparquet - description: Combined polygon data for all HQTA types. Created in D2_assemble_hqta_polygons.py. + description: Combined polygon data for all HQTA types. args: urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/hqta_areas.parquet \ No newline at end of file diff --git a/high_quality_transit_areas/C3_create_bus_hqta_types.py b/high_quality_transit_areas/create_bus_hqta_types.py similarity index 97% rename from high_quality_transit_areas/C3_create_bus_hqta_types.py rename to high_quality_transit_areas/create_bus_hqta_types.py index 1f0574e41..1264148b7 100644 --- a/high_quality_transit_areas/C3_create_bus_hqta_types.py +++ b/high_quality_transit_areas/create_bus_hqta_types.py @@ -17,8 +17,7 @@ from loguru import logger import _utils -import C1_prep_pairwise_intersections as prep_clip - +from prep_pairwise_intersections import prep_bus_corridors from calitp_data_analysis import utils from segment_speed_utils import helpers from update_vars import (GCS_FILE_PATH, analysis_date, @@ -93,7 +92,7 @@ def create_stops_along_corridors(all_stops: gpd.GeoDataFrame) -> gpd.GeoDataFram They may also be stops that don't meet the HQ corridor threshold, but are stops that physically reside in the corridor. """ - bus_corridors = (prep_clip.prep_bus_corridors(is_hq_corr = True) + bus_corridors = (prep_bus_corridors(is_hq_corr = True) [["hqta_segment_id", "geometry"]] ) diff --git a/high_quality_transit_areas/B1_create_hqta_segments.py b/high_quality_transit_areas/create_hqta_segments.py similarity index 100% rename from high_quality_transit_areas/B1_create_hqta_segments.py rename to high_quality_transit_areas/create_hqta_segments.py diff --git a/high_quality_transit_areas/C2_get_intersections.py b/high_quality_transit_areas/get_intersections.py similarity index 100% rename from high_quality_transit_areas/C2_get_intersections.py rename to high_quality_transit_areas/get_intersections.py diff --git a/high_quality_transit_areas/C1_prep_pairwise_intersections.py b/high_quality_transit_areas/prep_pairwise_intersections.py similarity index 100% rename from high_quality_transit_areas/C1_prep_pairwise_intersections.py rename to high_quality_transit_areas/prep_pairwise_intersections.py diff --git a/high_quality_transit_areas/A1_rail_ferry_brt_stops.py b/high_quality_transit_areas/rail_ferry_brt_stops.py similarity index 100% rename from high_quality_transit_areas/A1_rail_ferry_brt_stops.py rename to high_quality_transit_areas/rail_ferry_brt_stops.py diff --git a/high_quality_transit_areas/B2_sjoin_stops_to_segments.py b/high_quality_transit_areas/sjoin_stops_to_segments.py similarity index 100% rename from high_quality_transit_areas/B2_sjoin_stops_to_segments.py rename to high_quality_transit_areas/sjoin_stops_to_segments.py From dae14d09c2c66d9e084aa986f79a8972848a8484 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 25 Sep 2024 17:37:10 +0000 Subject: [PATCH 09/11] clean up logs --- .../logs/hqta_processing.log | 120 ------------------ 1 file changed, 120 deletions(-) diff --git a/high_quality_transit_areas/logs/hqta_processing.log b/high_quality_transit_areas/logs/hqta_processing.log index 3811550e5..9db268abf 100644 --- a/high_quality_transit_areas/logs/hqta_processing.log +++ b/high_quality_transit_areas/logs/hqta_processing.log @@ -1,123 +1,3 @@ -2024-02-15 10:39:37.404 | INFO | __main__::62 - Analysis date: 2024-02-14 -2024-02-15 10:39:53.615 | INFO | __main__::70 - grabbed rail: 0:00:16.209130 -2024-02-15 10:40:08.900 | INFO | __main__::79 - grabbed brt: 0:00:15.285392 -2024-02-15 10:40:23.528 | INFO | __main__::86 - grabbed ferry: 0:00:14.627469 -2024-02-15 10:40:27.034 | INFO | __main__::102 - concatenated datasets -2024-02-15 10:40:27.290 | INFO | __main__::113 - execution time: 0:00:49.884562 -2024-02-15 10:41:59.689 | INFO | __main__::281 - Analysis date: 2024-02-14 -2024-02-15 10:42:06.502 | INFO | __main__::298 - merge routes to trips: 0:00:06.811228 -2024-02-15 10:48:36.857 | INFO | __main__::319 - cut segments: 0:06:30.354741 -2024-02-15 10:48:36.858 | INFO | __main__::322 - total execution time: 0:06:37.166630 -2024-02-15 10:48:55.584 | INFO | __main__::218 - Analysis date: 2024-02-14 -2024-02-15 10:49:39.341 | INFO | __main__::258 - Execution time: 0:00:43.7553602024-02-15 11:12:10.529 | INFO | __main__::123 - operators_for_hqta Analysis date: 2024-02-14 -2024-02-15 11:12:11.884 | INFO | __main__::132 - get list of cached ITP IDs: 0:00:01.322261 -2024-02-15 11:12:13.731 | INFO | __main__::148 - check files for completeness, save as json: 0:00:01.846775 -2024-02-15 11:12:13.731 | INFO | __main__::151 - operators_for_hqta execution time: 0:00:03.169393 -2024-02-15 11:12:29.212 | INFO | __main__::62 - A2_combine_stops Analysis Date: 2024-02-14 -2024-02-15 11:12:42.921 | INFO | __main__::70 - grabbed rail: 0:00:13.707923 -2024-02-15 11:12:56.621 | INFO | __main__::79 - grabbed brt: 0:00:13.700660 -2024-02-15 11:13:08.412 | INFO | __main__::86 - grabbed ferry: 0:00:11.790357 -2024-02-15 11:13:11.728 | INFO | __main__::110 - A2_combine_stops execution time: 0:00:42.515868 -2024-02-15 11:13:25.690 | INFO | __main__::281 - B1_create_hqta_segments Analysis date: 2024-02-14 -2024-02-15 11:13:31.067 | INFO | __main__::298 - merge routes to trips: 0:00:05.376384 -2024-02-15 11:17:26.810 | INFO | __main__::319 - cut segments: 0:03:55.742468 -2024-02-15 11:17:26.810 | INFO | __main__::322 - B1_create_hqta_segments execution time: 0:04:01.119368 -2024-02-15 11:17:40.815 | INFO | __main__::218 - B2_sjoin_stops_to_segments Analysis date: 2024-02-14 -2024-02-15 11:18:13.209 | INFO | __main__::258 - B2_sjoin_stops_to_segments execution time: 0:00:32.393811 -2024-02-15 11:18:28.611 | INFO | __main__::90 - C1_prep_pairwise_intersections Analysis date: 2024-02-14 -2024-02-15 11:18:35.802 | INFO | __main__::113 - get pairwise table: 0:00:07.190876 -2024-02-15 11:18:36.183 | INFO | __main__::136 - compute for pairwise/subset_corridors: 0:00:00.381023 -2024-02-15 11:18:38.070 | INFO | __main__::148 - C1_prep_pairwise_intersections execution time: 0:00:09.459173 -2024-02-15 11:18:51.793 | INFO | __main__::109 - C2_find_intersections Analysis date: 2024-02-14 -2024-02-15 11:18:58.127 | INFO | __main__::119 - attach geometry to pairwise table: 0:00:06.332460 -2024-02-15 11:19:14.113 | INFO | __main__::124 - find intersections: 0:00:15.986460 -2024-02-15 11:19:17.140 | INFO | __main__::133 - C2_find_intersections execution time: 0:00:25.345916 -2024-02-15 11:19:30.961 | INFO | __main__::127 - C3_create_bus_hqta_types Analysis date: 2024-02-14 -2024-02-15 11:19:46.819 | INFO | __main__::141 - grab all stops -2024-02-15 11:19:49.936 | INFO | __main__::145 - create major stop bus -2024-02-15 11:19:53.686 | INFO | __main__::149 - create hq corridor bus -2024-02-15 11:19:54.723 | INFO | __main__::165 - C3_create_bus_hqta_types execution time: 0:00:23.759924 -2024-02-15 11:20:10.567 | INFO | __main__::209 - D1_assemble_hqta_points Analysis date: 2024-02-14 -2024-02-15 11:20:15.564 | INFO | __main__::234 - combined points: 0:00:04.997074 -2024-02-15 11:20:28.853 | INFO | __main__::240 - add route info: 0:00:13.288940 -2024-02-15 11:20:38.686 | INFO | __main__::248 - add agency names: 0:00:09.832929 -2024-02-15 11:20:39.357 | INFO | __main__::258 - export as geoparquet in date folder -2024-02-15 11:20:39.906 | INFO | __main__::267 - export as geoparquet -2024-02-15 11:20:39.907 | INFO | __main__::270 - D1_assemble_hqta_points execution time: 0:00:29.339672 -2024-02-15 11:20:53.679 | INFO | __main__::135 - D2_assemble_hqta_polygons Analysis date: 2024-02-14 -2024-02-15 11:21:20.489 | INFO | __main__::146 - filter and buffer: 0:00:26.810481 -2024-02-15 11:21:22.772 | INFO | __main__::159 - export as geoparquet in date folder -2024-02-15 11:21:24.051 | INFO | __main__::168 - export as geoparquet -2024-02-15 11:21:24.052 | INFO | __main__::171 - D2_assemble_hqta_polygons execution time: 0:00:30.373503 -2024-02-23 12:16:18.905 | INFO | __main__::334 - A1_rail_ferry_brt_stops 2024-02-14 execution time: 0:00:55.356475 -2024-02-23 13:02:56.942 | INFO | __main__::243 - B1_create_hqta_segments execution time: 0:02:45.137511 -2024-02-23 13:06:07.547 | INFO | __main__::256 - B2_sjoin_stops_to_segments 2024-02-14 execution time: 0:00:43.474327 -2024-02-23 13:18:24.960 | INFO | __main__::143 - C1_prep_pairwise_intersections 2024-02-14 execution time: 0:00:09.433171 -2024-02-23 13:20:52.094 | INFO | __main__::125 - C2_find_intersections 2024-02-14 execution time: 0:00:26.055988 -2024-02-23 13:38:04.076 | INFO | __main__::341 - A1_rail_ferry_brt_stops 2024-02-14 execution time: 0:00:47.907172 -2024-02-23 13:41:03.390 | INFO | __main__::243 - B1_create_hqta_segments execution time: 0:02:43.653086 -2024-02-23 13:41:58.511 | INFO | __main__::256 - B2_sjoin_stops_to_segments 2024-02-14 execution time: 0:00:38.637850 -2024-02-23 13:42:21.715 | INFO | __main__::143 - C1_prep_pairwise_intersections 2024-02-14 execution time: 0:00:06.457581 -2024-02-23 13:42:59.131 | INFO | __main__::125 - C2_find_intersections 2024-02-14 execution time: 0:00:21.831450 -2024-02-23 13:48:09.216 | INFO | __main__::160 - C3_create_bus_hqta_types 2024-02-14 execution time: 0:00:21.771559 -2024-02-23 15:04:25.099 | INFO | __main__::295 - D1_assemble_hqta_points 2024-02-14 execution time: 0:00:24.251981 -2024-02-23 15:06:13.474 | INFO | __main__::167 - D2_assemble_hqta_polygons 2024-02-14 execution time: 0:00:20.572008 -2024-03-14 10:35:35.916 | INFO | __main__::340 - A1_rail_ferry_brt_stops 2023-03-13 execution time: 0:00:56.468970 -2024-03-14 10:49:33.915 | INFO | __main__::243 - B1_create_hqta_segments execution time: 0:13:35.798908 -2024-03-14 10:50:41.957 | INFO | __main__::256 - B2_sjoin_stops_to_segments 2023-03-13 execution time: 0:00:46.505567 -2024-03-14 10:51:12.944 | INFO | __main__::142 - C1_prep_pairwise_intersections 2023-03-13 execution time: 0:00:07.495188 -2024-03-14 10:52:10.238 | INFO | __main__::125 - C2_find_intersections 2023-03-13 execution time: 0:00:34.444930 -2024-03-14 10:53:01.682 | INFO | __main__::163 - C3_create_bus_hqta_types 2023-03-13 execution time: 0:00:29.090421 -2024-03-14 11:27:07.016 | INFO | __main__::259 - B1_create_hqta_segments execution time: 0:01:24.890920 -2024-03-14 11:29:20.496 | INFO | __main__::295 - D1_assemble_hqta_points 2023-03-13 execution time: 0:00:22.179824 -2024-03-14 11:30:06.328 | INFO | __main__::167 - D2_assemble_hqta_polygons 2023-03-13 execution time: 0:00:22.226070 -2024-03-14 11:48:11.160 | INFO | __main__::340 - A1_rail_ferry_brt_stops 2024-03-13 execution time: 0:00:57.213630 -2024-03-14 11:53:27.946 | INFO | __main__::259 - B1_create_hqta_segments execution time: 0:04:58.538786 -2024-03-14 11:54:43.754 | INFO | __main__::256 - B2_sjoin_stops_to_segments 2024-03-13 execution time: 0:00:50.087412 -2024-03-14 11:55:10.649 | INFO | __main__::142 - C1_prep_pairwise_intersections 2024-03-13 execution time: 0:00:07.089710 -2024-03-14 11:56:06.191 | INFO | __main__::125 - C2_find_intersections 2024-03-13 execution time: 0:00:35.945019 -2024-03-14 11:56:55.334 | INFO | __main__::163 - C3_create_bus_hqta_types 2024-03-13 execution time: 0:00:27.390021 -2024-03-14 12:12:21.763 | INFO | __main__::295 - D1_assemble_hqta_points 2024-03-13 execution time: 0:00:26.480160 -2024-03-14 12:13:12.687 | INFO | __main__::167 - D2_assemble_hqta_polygons 2024-03-13 execution time: 0:00:29.033860 -2024-03-21 11:54:40.930 | INFO | __main__::354 - A1_rail_ferry_brt_stops 2024-03-13 execution time: 0:00:51.987419 -2024-03-21 12:01:28.365 | INFO | __main__::249 - B1_create_hqta_segments execution time: 0:03:02.428114 -2024-03-21 12:02:23.099 | INFO | __main__::256 - B2_sjoin_stops_to_segments 2024-03-13 execution time: 0:00:35.845848 -2024-03-21 12:02:46.911 | INFO | __main__::142 - C1_prep_pairwise_intersections 2024-03-13 execution time: 0:00:05.864652 -2024-03-21 12:03:24.770 | INFO | __main__::125 - C2_find_intersections 2024-03-13 execution time: 0:00:21.158652 -2024-03-21 12:04:01.449 | INFO | __main__::163 - C3_create_bus_hqta_types 2024-03-13 execution time: 0:00:19.553787 -2024-03-21 12:04:42.807 | INFO | __main__::295 - D1_assemble_hqta_points 2024-03-13 execution time: 0:00:22.988739 -2024-03-21 12:05:20.102 | INFO | __main__::167 - D2_assemble_hqta_polygons 2024-03-13 execution time: 0:00:19.166756 -2024-04-18 12:02:44.870 | INFO | __main__::354 - A1_rail_ferry_brt_stops 2024-04-17 execution time: 0:00:59.115933 -2024-04-18 12:09:06.425 | INFO | __main__::256 - B2_sjoin_stops_to_segments 2024-04-17 execution time: 0:00:50.678918 -2024-04-18 12:09:36.340 | INFO | __main__::142 - C1_prep_pairwise_intersections 2024-04-17 execution time: 0:00:07.719892 -2024-04-18 12:10:31.226 | INFO | __main__::125 - C2_find_intersections 2024-04-17 execution time: 0:00:33.802270 -2024-04-18 12:11:31.609 | INFO | __main__::163 - C3_create_bus_hqta_types 2024-04-17 execution time: 0:00:37.330690 -2024-04-18 12:12:28.853 | INFO | __main__::296 - D1_assemble_hqta_points 2024-04-17 execution time: 0:00:31.955298 -2024-04-18 12:13:36.294 | INFO | __main__::167 - D2_assemble_hqta_polygons 2024-04-17 execution time: 0:00:40.596021 -2024-06-07 15:55:59.608 | INFO | __main__::354 - A1_rail_ferry_brt_stops 2024-05-26 execution time: 0:00:43.845652 -2024-06-07 16:00:03.975 | INFO | __main__::249 - B1_create_hqta_segments execution time: 0:03:43.783595 -2024-06-07 16:01:02.185 | INFO | __main__::256 - B2_sjoin_stops_to_segments 2024-05-26 execution time: 0:00:34.801918 -2024-06-07 16:01:29.932 | INFO | __main__::142 - C1_prep_pairwise_intersections 2024-05-26 execution time: 0:00:05.850319 -2024-06-07 16:02:05.231 | INFO | __main__::125 - C2_find_intersections 2024-05-26 execution time: 0:00:14.305249 -2024-06-07 16:02:42.337 | INFO | __main__::163 - C3_create_bus_hqta_types 2024-05-26 execution time: 0:00:16.144903 -2024-06-07 16:03:25.052 | INFO | __main__::296 - D1_assemble_hqta_points 2024-05-26 execution time: 0:00:20.105690 -2024-06-07 16:04:04.859 | INFO | __main__::167 - D2_assemble_hqta_polygons 2024-05-26 execution time: 0:00:16.899794 -2024-06-13 10:52:06.307 | INFO | __main__::354 - A1_rail_ferry_brt_stops 2024-05-22 execution time: 0:02:22.756503 -2024-06-13 12:58:01.749 | INFO | __main__::249 - B1_create_hqta_segments execution time: 0:13:52.098231 -2024-06-13 13:00:11.575 | INFO | __main__::256 - B2_sjoin_stops_to_segments 2024-05-22 execution time: 0:01:32.770484 -2024-06-13 13:00:50.204 | INFO | __main__::142 - C1_prep_pairwise_intersections 2024-05-22 execution time: 0:00:10.587615 -2024-06-13 13:01:58.938 | INFO | __main__::125 - C2_find_intersections 2024-05-22 execution time: 0:00:42.017435 -2024-06-13 13:03:04.167 | INFO | __main__::163 - C3_create_bus_hqta_types 2024-05-22 execution time: 0:00:38.066749 -2024-06-13 13:04:13.581 | INFO | __main__::296 - D1_assemble_hqta_points 2024-05-22 execution time: 0:00:33.857546 -2024-06-13 13:05:21.917 | INFO | __main__::167 - D2_assemble_hqta_polygons 2024-05-22 execution time: 0:00:38.362120 -2024-06-13 13:08:03.561 | INFO | __main__::354 - A1_rail_ferry_brt_stops 2024-06-12 execution time: 0:01:04.260629 -2024-06-13 13:17:59.981 | INFO | __main__::249 - B1_create_hqta_segments execution time: 0:09:30.012600 -2024-06-13 13:19:38.445 | INFO | __main__::256 - B2_sjoin_stops_to_segments 2024-06-12 execution time: 0:01:13.144507 -2024-06-13 13:20:16.756 | INFO | __main__::142 - C1_prep_pairwise_intersections 2024-06-12 execution time: 0:00:09.378312 -2024-06-13 13:21:54.122 | INFO | __main__::125 - C2_find_intersections 2024-06-12 execution time: 0:01:04.703513 -2024-06-13 13:23:49.518 | INFO | __main__::163 - C3_create_bus_hqta_types 2024-06-12 execution time: 0:01:10.061193 -2024-06-13 13:25:40.133 | INFO | __main__::296 - D1_assemble_hqta_points 2024-06-12 execution time: 0:00:58.173453 -2024-06-13 13:27:47.666 | INFO | __main__::167 - D2_assemble_hqta_polygons 2024-06-12 execution time: 0:01:16.776741 2024-07-18 12:57:28.027 | INFO | __main__::354 - A1_rail_ferry_brt_stops 2024-07-17 execution time: 0:01:01.056099 2024-07-18 13:01:59.287 | INFO | __main__::249 - B1_create_hqta_segments execution time: 0:04:10.669481 2024-07-18 13:03:04.077 | INFO | __main__::256 - B2_sjoin_stops_to_segments 2024-07-17 execution time: 0:00:44.167777 From 8498292164885de6d4ccd695f9d47ce6f0f267cf Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 25 Sep 2024 17:42:04 +0000 Subject: [PATCH 10/11] (remove): combine 3 hqta notebook checks into 1 --- .../check1_downloads.ipynb | 107 ------------- .../check2_hq_corridors.ipynb | 150 ------------------ ..._hqta_points.ipynb => check_exports.ipynb} | 58 +++++-- 3 files changed, 41 insertions(+), 274 deletions(-) delete mode 100644 high_quality_transit_areas/check1_downloads.ipynb delete mode 100644 high_quality_transit_areas/check2_hq_corridors.ipynb rename high_quality_transit_areas/{check3_hqta_points.ipynb => check_exports.ipynb} (81%) diff --git a/high_quality_transit_areas/check1_downloads.ipynb b/high_quality_transit_areas/check1_downloads.ipynb deleted file mode 100644 index b12fc7687..000000000 --- a/high_quality_transit_areas/check1_downloads.ipynb +++ /dev/null @@ -1,107 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "08f82968-a8b5-42c3-919a-f8f2028b9c8a", - "metadata": {}, - "source": [ - "# Check: initial downloads\n", - "\n", - "Make maps to see that rail/ferry/brt all show up correctly." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ecdd335a-be94-4a11-aaca-24a43a3b9756", - "metadata": {}, - "outputs": [], - "source": [ - "import geopandas as gpd\n", - "import pandas as pd\n", - "\n", - "from IPython.display import Markdown\n", - "\n", - "from segment_speed_utils import helpers\n", - "from update_vars import analysis_date, GCS_FILE_PATH\n", - "\n", - "# Map arguments\n", - "TILES = \"Carto DB Positron\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "573004b2-659d-4930-97b9-3f333a7ab8d8", - "metadata": {}, - "outputs": [], - "source": [ - "def make_map(gdf, plot_col):\n", - " date_cols = [c for c in gdf.columns if \n", - " gdf[c].dtype == 'datetime64[ns]']\n", - "\n", - " gdf = gdf.drop(columns = date_cols)\n", - " \n", - " m = gdf.explore(plot_col, categorical = True, tiles = TILES)\n", - " \n", - " display(m)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5351b749-106b-4b4b-aa67-aa76bad06ca2", - "metadata": {}, - "outputs": [], - "source": [ - "stops = gpd.read_parquet(\n", - " f\"{GCS_FILE_PATH}rail_brt_ferry.parquet\"\n", - ")\n", - "\n", - "hqta_types = list(stops.hqta_type.unique())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "72b13f4a-313b-45fe-b420-3bb656c2fd25", - "metadata": {}, - "outputs": [], - "source": [ - "for i in hqta_types:\n", - " display(Markdown(f\"### HQTA Type: {i}\"))\n", - " \n", - " make_map(stops[stops.hqta_type==i], \"route_id\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5dc0b6aa-4e52-4c56-9d44-a9818b2890b2", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/high_quality_transit_areas/check2_hq_corridors.ipynb b/high_quality_transit_areas/check2_hq_corridors.ipynb deleted file mode 100644 index eba631e67..000000000 --- a/high_quality_transit_areas/check2_hq_corridors.ipynb +++ /dev/null @@ -1,150 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "08f82968-a8b5-42c3-919a-f8f2028b9c8a", - "metadata": {}, - "source": [ - "# Check: HQ corridors" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ecdd335a-be94-4a11-aaca-24a43a3b9756", - "metadata": {}, - "outputs": [], - "source": [ - "import geopandas as gpd\n", - "import pandas as pd\n", - "\n", - "from shared_utils import rt_dates\n", - "from update_vars import analysis_date" - ] - }, - { - "cell_type": "markdown", - "id": "dc9684e7-ebc1-48a8-8360-40682a1a1c14", - "metadata": {}, - "source": [ - "### After `C3_create_bus_hqta_types`\n", - "\n", - "Check part of the compiling and assembly of polygons." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4fa137db-08d5-4822-9bdd-46919ee0da7f", - "metadata": {}, - "outputs": [], - "source": [ - "import C1_prep_pairwise_intersections as prep_clip\n", - "import D2_assemble_hqta_polygons as D2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1e383a39-3810-41f6-a366-985126b335db", - "metadata": {}, - "outputs": [], - "source": [ - "bus_hq_corr = prep_clip.prep_bus_corridors(is_hq_corr=True)\n", - "\n", - "corridors = D2.get_dissolved_hq_corridor_bus(bus_hq_corr, \n", - " analysis_date)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "691394ca-b7cc-4f81-8a96-2c167748e240", - "metadata": {}, - "outputs": [], - "source": [ - "COUNTY_URL = \"https://opendata.arcgis.com/datasets/8713ced9b78a4abb97dc130a691a8695_0.geojson\"\n", - "\n", - "EPSG_CODE = corridors.crs.to_epsg()\n", - "counties = gpd.read_file(COUNTY_URL).to_crs(f\"EPSG: {EPSG_CODE}\")\n", - "\n", - "bay_area_counties = [\n", - " \"Alameda\", \"Contra Costa\", \n", - " \"Marin\", \"Napa\", \n", - " \"San Francisco\", \"San Mateo\", \"Santa Clara\", \n", - " \"Solano\", \"Sonoma\"\n", - "]\n", - "\n", - "hqta_in_bay = gpd.sjoin(\n", - " corridors,\n", - " counties[counties.COUNTY_NAME.isin(bay_area_counties)][\n", - " [\"COUNTY_NAME\", \"geometry\"]],\n", - " how = \"inner\",\n", - " predicate=\"intersects\"\n", - ").drop(columns=\"index_right\")\n", - "\n", - "hqta_in_la = gpd.sjoin(\n", - " corridors,\n", - " counties[counties.COUNTY_NAME == \"Los Angeles\"][\n", - " [\"COUNTY_NAME\", \"geometry\"]],\n", - " how = \"inner\",\n", - " predicate=\"intersects\"\n", - ").drop(columns=\"index_right\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c6368928-8f1a-4c08-b826-96115e85eb4f", - "metadata": {}, - "outputs": [], - "source": [ - "# Bay Area\n", - "TILES = \"CartoDB Positron\"\n", - "hqta_in_bay.explore(\"feed_key_primary\", categorical=True, \n", - " tiles = TILES, legend=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "549e7974-fa20-4cce-b02a-7984192483b4", - "metadata": {}, - "outputs": [], - "source": [ - "# LA\n", - "hqta_in_la.explore(\"feed_key_primary\", categorical=True, \n", - " tiles = TILES, legend=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fdd4f490-df75-4433-898a-4479f5cb62b2", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/high_quality_transit_areas/check3_hqta_points.ipynb b/high_quality_transit_areas/check_exports.ipynb similarity index 81% rename from high_quality_transit_areas/check3_hqta_points.ipynb rename to high_quality_transit_areas/check_exports.ipynb index 7869c49f4..efb28e05f 100644 --- a/high_quality_transit_areas/check3_hqta_points.ipynb +++ b/high_quality_transit_areas/check_exports.ipynb @@ -5,13 +5,7 @@ "id": "cec8ccdd-5225-4814-b59a-a8d398062e35", "metadata": {}, "source": [ - "# Check HQTA points / polygons\n", - "\n", - "## Dropping bad stops\n", - "\n", - "* Be more stringent about what `stop_id` to drop, since the same `stop_id` can be shared across operators. Also add in which operator.\n", - "\n", - "### Done in `D2_assemble_hqta_polygons`, but should also be added to `D1_assemble_hqta_points`\n" + "# Check HQTA points / polygons" ] }, { @@ -32,29 +26,57 @@ { "cell_type": "code", "execution_count": null, - "id": "1c880d7b-ebb3-46c7-98f1-d724bdd802cb", + "id": "ad15e88d-da20-435c-b3c0-34df96ff75bf", "metadata": {}, "outputs": [], "source": [ - "gdf = gpd.read_parquet(f\"{GCS_FILE_PATH}hqta_points.parquet\")" + "def make_map(gdf, plot_col):\n", + " date_cols = [c for c in gdf.columns if \n", + " gdf[c].dtype == 'datetime64[ns]']\n", + " \n", + " m = gdf.drop(date_cols).explore(\n", + " plot_col, \n", + " categorical=True, \n", + " tiles = \"CartoDB Positron\", \n", + " legend=True\n", + " )\n", + " \n", + " display(m)" + ] + }, + { + "cell_type": "markdown", + "id": "927b1e97-e7d6-4f46-9550-5c3f83ac08a8", + "metadata": {}, + "source": [ + "## Rail / BRT / Ferry stops" ] }, { "cell_type": "code", "execution_count": null, - "id": "ee3dad2e-935a-42b8-92af-7e7fae3d9248", + "id": "dcd7f8df-93bd-4702-a29e-ddd9211de08f", "metadata": {}, "outputs": [], "source": [ - "TILES = \"CartoDB Positron\"\n", + "stops = gpd.read_parquet(\n", + " f\"{GCS_FILE_PATH}rail_brt_ferry.parquet\"\n", + ")\n", "\n", - "def make_map(gdf, plot_col):\n", - " if \"service_date\" in gdf.columns:\n", - " gdf = gdf.drop(columns = \"service_date\")\n", - " \n", - " m = gdf.explore(plot_col, categorical=True, tiles = TILES, legend=True)\n", + "hqta_types = list(stops.hqta_type.unique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01ad05f9-bcb3-4b06-8911-2ab3669b5561", + "metadata": {}, + "outputs": [], + "source": [ + "for i in hqta_types:\n", + " display(Markdown(f\"### HQTA Type: {i}\"))\n", " \n", - " display(m)" + " make_map(stops[stops.hqta_type==i], \"route_id\")" ] }, { @@ -95,6 +117,8 @@ "metadata": {}, "outputs": [], "source": [ + "gdf = gpd.read_parquet(f\"{GCS_FILE_PATH}hqta_points.parquet\")\n", + "\n", "check_for_missing(gdf) " ] }, From e382995dc78f54fc8f80b1c5f1f3b309955bf1bd Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 25 Sep 2024 17:45:52 +0000 Subject: [PATCH 11/11] notebooks use numbered ordering, add _utils --- ...ines.ipynb => 01_corridors-as-lines.ipynb} | 0 ..._SACOG.ipynb => 02_hqta_green_SACOG.ipynb} | 0 .../{hqta-map.ipynb => 03_hqta_map.ipynb} | 0 ...ittier.ipynb => 04_explore_whittier.ipynb} | 0 .../{metro_brt.ipynb => 05_metro_brt.ipynb} | 0 .../{muni_brt.ipynb => 06_muni_brt.ipynb} | 0 .../{amtrak.ipynb => 07_amtrak.ipynb} | 0 high_quality_transit_areas/_utils.py | 43 +++++++++++++++++++ 8 files changed, 43 insertions(+) rename high_quality_transit_areas/{corridors-as-lines.ipynb => 01_corridors-as-lines.ipynb} (100%) rename high_quality_transit_areas/{hqta_green_SACOG.ipynb => 02_hqta_green_SACOG.ipynb} (100%) rename high_quality_transit_areas/{hqta-map.ipynb => 03_hqta_map.ipynb} (100%) rename high_quality_transit_areas/{explore-whittier.ipynb => 04_explore_whittier.ipynb} (100%) rename high_quality_transit_areas/{metro_brt.ipynb => 05_metro_brt.ipynb} (100%) rename high_quality_transit_areas/{muni_brt.ipynb => 06_muni_brt.ipynb} (100%) rename high_quality_transit_areas/{amtrak.ipynb => 07_amtrak.ipynb} (100%) create mode 100644 high_quality_transit_areas/_utils.py diff --git a/high_quality_transit_areas/corridors-as-lines.ipynb b/high_quality_transit_areas/01_corridors-as-lines.ipynb similarity index 100% rename from high_quality_transit_areas/corridors-as-lines.ipynb rename to high_quality_transit_areas/01_corridors-as-lines.ipynb diff --git a/high_quality_transit_areas/hqta_green_SACOG.ipynb b/high_quality_transit_areas/02_hqta_green_SACOG.ipynb similarity index 100% rename from high_quality_transit_areas/hqta_green_SACOG.ipynb rename to high_quality_transit_areas/02_hqta_green_SACOG.ipynb diff --git a/high_quality_transit_areas/hqta-map.ipynb b/high_quality_transit_areas/03_hqta_map.ipynb similarity index 100% rename from high_quality_transit_areas/hqta-map.ipynb rename to high_quality_transit_areas/03_hqta_map.ipynb diff --git a/high_quality_transit_areas/explore-whittier.ipynb b/high_quality_transit_areas/04_explore_whittier.ipynb similarity index 100% rename from high_quality_transit_areas/explore-whittier.ipynb rename to high_quality_transit_areas/04_explore_whittier.ipynb diff --git a/high_quality_transit_areas/metro_brt.ipynb b/high_quality_transit_areas/05_metro_brt.ipynb similarity index 100% rename from high_quality_transit_areas/metro_brt.ipynb rename to high_quality_transit_areas/05_metro_brt.ipynb diff --git a/high_quality_transit_areas/muni_brt.ipynb b/high_quality_transit_areas/06_muni_brt.ipynb similarity index 100% rename from high_quality_transit_areas/muni_brt.ipynb rename to high_quality_transit_areas/06_muni_brt.ipynb diff --git a/high_quality_transit_areas/amtrak.ipynb b/high_quality_transit_areas/07_amtrak.ipynb similarity index 100% rename from high_quality_transit_areas/amtrak.ipynb rename to high_quality_transit_areas/07_amtrak.ipynb diff --git a/high_quality_transit_areas/_utils.py b/high_quality_transit_areas/_utils.py new file mode 100644 index 000000000..fd42638bf --- /dev/null +++ b/high_quality_transit_areas/_utils.py @@ -0,0 +1,43 @@ +""" +Shared utility functions for HQTA +""" +import geopandas as gpd +import intake +import pandas as pd + +catalog = intake.open_catalog("catalog.yml") + +def add_hqta_details(row) -> str: + """ + Add HQTA details of why nulls are present + based on feedback from open data users. + """ + if row.hqta_type == "major_stop_bus": + if row.schedule_gtfs_dataset_key_primary != row.schedule_gtfs_dataset_key_secondary: + return "intersection_2_bus_routes_different_operators" + else: + return "intersection_2_bus_routes_same_operator" + + elif row.hqta_type == "hq_corridor_bus": + if row.peak_trips >= 4: + return "corridor_frequent_stop" + else: + return "corridor_other_stop" + + elif row.hqta_type in ["major_stop_ferry", + "major_stop_brt", "major_stop_rail"]: + return row.hqta_type + "_single_operator" + +def primary_rename(df: pd.DataFrame) -> pd.DataFrame: + return df.rename( + columns = {"schedule_gtfs_dataset_key": "schedule_gtfs_dataset_key_primary"}) + +def clip_to_ca(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """ + Clip to CA boundaries. + """ + ca = catalog.ca_boundary.read().to_crs(gdf.crs) + + gdf2 = gdf.clip(ca, keep_geom_type = False).reset_index(drop=True) + + return gdf2 \ No newline at end of file