diff --git a/conveyal_update/conveyal_vars.py b/conveyal_update/conveyal_vars.py index 2ebfc8280..c7996961d 100644 --- a/conveyal_update/conveyal_vars.py +++ b/conveyal_update/conveyal_vars.py @@ -1,17 +1,20 @@ import datetime as dt +from shared_utils import rt_dates GCS_PATH = 'gs://calitp-analytics-data/data-analyses/conveyal_update/' -TARGET_DATE = dt.date(2022, 9, 21) # 2022 date for SCAG request +TARGET_DATE = rt_dates.DATES['oct2024'] OSM_FILE = 'us-west-latest.osm.pbf' # http://download.geofabrik.de/north-america/us-west-latest.osm.pbf # first download with wget... conveyal_regions = {} # boundaries correspond to Conveyal Analysis regions -conveyal_regions['norcal'] = {'north': 42.03909, 'south': 39.07038, 'east': -119.60541, 'west': -124.49158} -conveyal_regions['central'] = {'north': 39.64165, 'south': 35.87347, 'east': -117.53174, 'west': -123.83789} -conveyal_regions['socal'] = {'north': 35.8935, 'south': 32.5005, 'east': -114.13121, 'west': -121.46759} -conveyal_regions['mojave'] = {'north': 37.81629, 'south': 34.89945, 'east': -114.59015, 'west': -118.38043} +# conveyal_regions['norcal'] = {'north': 42.03909, 'south': 39.07038, 'east': -119.60541, 'west': -124.49158} +# conveyal_regions['central'] = {'north': 39.64165, 'south': 35.87347, 'east': -117.53174, 'west': -123.83789} +# conveyal_regions['socal'] = {'north': 35.8935, 'south': 32.5005, 'east': -114.13121, 'west': -121.46759} +# conveyal_regions['mojave'] = {'north': 37.81629, 'south': 34.89945, 'east': -114.59015, 'west': -118.38043} +# special region for one-off SR99 CMCP +conveyal_regions['sr99'] = {'north': 38.71337, 'south': 34.81154, 'east': -118.66882, 'west': -121.66259} # # special region for one-off Centennial Corridor # conveyal_regions['bakersfield'] = {'north': 36.81, 'south': 34.13, 'east': -117.12, 'west': -120.65} \ No newline at end of file diff --git a/conveyal_update/download_data.py b/conveyal_update/download_data.py index 74d6b2d00..3dcdf0f84 100644 --- a/conveyal_update/download_data.py +++ b/conveyal_update/download_data.py @@ -14,7 +14,7 @@ regions = conveyal_vars.conveyal_regions TARGET_DATE = conveyal_vars.TARGET_DATE -regions_and_feeds = pd.read_parquet(f'{conveyal_vars.GCS_PATH}regions_feeds_{TARGET_DATE.isoformat()}.parquet') +regions_and_feeds = pd.read_parquet(f'{conveyal_vars.GCS_PATH}regions_feeds_{TARGET_DATE}.parquet') def download_feed(row): # need wildcard for file too -- not all are gtfs.zip! diff --git a/conveyal_update/evaluate_feeds.py b/conveyal_update/evaluate_feeds.py index d11d9e8e4..3e1fe3ae7 100644 --- a/conveyal_update/evaluate_feeds.py +++ b/conveyal_update/evaluate_feeds.py @@ -21,6 +21,7 @@ def check_defined_elsewhere(row, df): def get_feeds_check_service(): feeds_on_target = gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(selected_date=TARGET_DATE) + feeds_on_target = feeds_on_target.rename(columns={'name':'gtfs_dataset_name'}) # default will use mtc subfeeds (prev Conveyal behavior), can spec customer facing if we wanna switch operator_feeds = feeds_on_target.feed_key @@ -38,7 +39,7 @@ def get_feeds_check_service(): def attach_transit_services(feeds_on_target: pd.DataFrame): - target_dt = dt.datetime.combine(TARGET_DATE, dt.time(0)) + target_dt = dt.datetime.combine(dt.date.fromisoformat(TARGET_DATE), dt.time(0)) services = (tbls.mart_transit_database.dim_gtfs_service_data() >> filter(_._valid_from <= target_dt, _._valid_to > target_dt) @@ -60,7 +61,7 @@ def report_undefined(feeds_on_target: pd.DataFrame): print(undefined.columns) print('these feeds have no service defined on target date, nor are their services captured in other feeds:') # gtfs_dataset_name no longer present, this whole script should probably be updated/replaced - # print(undefined >> select(_.gtfs_dataset_name, _.service_any_feed)) + print(undefined >> select(_.gtfs_dataset_name, _.service_any_feed)) print(f'saving detailed csv to {fname}') undefined.to_csv(fname) return @@ -71,5 +72,5 @@ def report_undefined(feeds_on_target: pd.DataFrame): feeds_on_target = attach_transit_services(feeds_on_target) print(f'feeds on target date shape: {feeds_on_target.shape}') report_undefined(feeds_on_target) - feeds_on_target.to_parquet(f'{conveyal_vars.GCS_PATH}feeds_{TARGET_DATE.isoformat()}.parquet') + feeds_on_target.to_parquet(f'{conveyal_vars.GCS_PATH}feeds_{TARGET_DATE}.parquet') \ No newline at end of file diff --git a/conveyal_update/match_feeds_regions.py b/conveyal_update/match_feeds_regions.py index 9008e311f..7f980437c 100644 --- a/conveyal_update/match_feeds_regions.py +++ b/conveyal_update/match_feeds_regions.py @@ -13,7 +13,7 @@ regions = conveyal_vars.conveyal_regions TARGET_DATE = conveyal_vars.TARGET_DATE -feeds_on_target = pd.read_parquet(f'{conveyal_vars.GCS_PATH}feeds_{TARGET_DATE.isoformat()}.parquet') +feeds_on_target = pd.read_parquet(f'{conveyal_vars.GCS_PATH}feeds_{TARGET_DATE}.parquet') def create_region_gdf(): # https://shapely.readthedocs.io/en/stable/reference/shapely.box.html#shapely.box @@ -38,4 +38,4 @@ def join_stops_regions(region_gdf: gpd.GeoDataFrame, feeds_on_target: pd.DataFra regions_and_feeds = join_stops_regions(region_gdf, feeds_on_target) regions_and_feeds = regions_and_feeds >> inner_join(_, feeds_on_target >> select(_.feed_key, _.gtfs_dataset_name, _.base64_url, _.date), on = 'feed_key') - regions_and_feeds.to_parquet(f'{conveyal_vars.GCS_PATH}regions_feeds_{TARGET_DATE.isoformat()}.parquet') \ No newline at end of file + regions_and_feeds.to_parquet(f'{conveyal_vars.GCS_PATH}regions_feeds_{TARGET_DATE}.parquet') \ No newline at end of file diff --git a/conveyal_update/scag_2022_export.ipynb b/conveyal_update/scag_2022_export.ipynb index 0f4194e40..a619e332b 100644 --- a/conveyal_update/scag_2022_export.ipynb +++ b/conveyal_update/scag_2022_export.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 9, + "execution_count": 1, "id": "2f0079e6-ac0e-4528-9970-2f3f4c453caa", "metadata": { "tags": [] @@ -17,6 +17,561 @@ "import json" ] }, + { + "cell_type": "code", + "execution_count": 2, + "id": "12238eac-ada9-4b82-9db7-52ed02112166", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", + "from shared_utils import gtfs_utils_v2\n", + "\n", + "from calitp_data_analysis.tables import tbls" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b865f3c0-5f0f-4fe6-ae6d-b72241593ab1", + "metadata": {}, + "outputs": [], + "source": [ + "import conveyal_vars\n", + "import evaluate_feeds" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ebb475b1-5155-484b-9c5c-dab9a67b2ddb", + "metadata": {}, + "outputs": [], + "source": [ + "df = evaluate_feeds.get_feeds_check_service()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c170ea11-dd08-4d27-b80a-64309ffcd3cf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | key | \n", + "date | \n", + "feed_key | \n", + "feed_timezone | \n", + "base64_url | \n", + "gtfs_dataset_key | \n", + "type | \n", + "regional_feed_type | \n", + "n | \n", + "service_key | \n", + "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "4125ea47789c59d089ac1d5ead4d7794 | \n", + "2024-10-16 | \n", + "696beb3cb2e375f8524ae18eff0d041d | \n", + "America/Los_Angeles | \n", + "aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... | \n", + "011e7b627d3c66bdff6d132359fe893d | \n", + "schedule | \n", + "None | \n", + "NaN | \n", + "9bf27849403d9fd205ad3bdab95eee75 | \n", + "
1 | \n", + "c10c3edb0f0b91efbafe23d66eea33d2 | \n", + "2024-10-16 | \n", + "511d593426d1fd821037dfd47d2e3cc8 | \n", + "America/Los_Angeles | \n", + "aHR0cHM6Ly90Y3J0YS50cmlwc2hvdC5jb20vdjEvZ3Rmcy... | \n", + "0139b1253130b33adcd4b3a4490530d2 | \n", + "schedule | \n", + "None | \n", + "310.0 | \n", + "a904344b7befc937a6bec6ac9f4dc95e | \n", + "
2 | \n", + "0a052cef2795327217a094d7dcab8941 | \n", + "2024-10-16 | \n", + "8685c49ef1273fe958478531f2c6a781 | \n", + "America/Los_Angeles | \n", + "aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW... | \n", + "015d67d5b75b5cf2b710bbadadfb75f5 | \n", + "schedule | \n", + "Regional Subfeed | \n", + "642.0 | \n", + "b084c88a8a29daaf8e60efd2c5f80e4f | \n", + "
3 | \n", + "a01bb4a8d9ac98d49c8c5e92aa0ed930 | \n", + "2024-10-16 | \n", + "705807dec27e43e0d93e9a7e966068d8 | \n", + "America/Los_Angeles | \n", + "aHR0cHM6Ly93d3cuYXVidXJuLmNhLmdvdi9Eb2N1bWVudE... | \n", + "020467a276c12a9fe4b0a2332e393f2c | \n", + "schedule | \n", + "None | \n", + "8.0 | \n", + "434367bd6da9dfb848dd4049784a5f5a | \n", + "
4 | \n", + "502985287f0329886c92cde5f2f285b2 | \n", + "2024-10-16 | \n", + "0110e66b44a8f07126af415097613bb6 | \n", + "America/Los_Angeles | \n", + "aHR0cHM6Ly9naXRsYWIuY29tL0xBQ01UQS9ndGZzX2J1cy... | \n", + "0666caf3ec1ecc96b74f4477ee4bc939 | \n", + "schedule | \n", + "None | \n", + "13658.0 | \n", + "4c0e57d94eb0c9d03fda80fb6de7a271 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
240 | \n", + "4e4b4362ff1df49de57ad784a6b45a1a | \n", + "2024-10-16 | \n", + "d53527fad752767624caca4b773f26ea | \n", + "America/Anchorage | \n", + "aHR0cDovL3d3dy5zYW5qdWFuY2FwaXN0cmFuby5yaWRlc3... | \n", + "fbea6a9ec53590f709e42e6aa5d0a79e | \n", + "schedule | \n", + "None | \n", + "18.0 | \n", + "89a211c66cb55e774d37682c236f8ce7 | \n", + "
241 | \n", + "381626c4ed3c74419d65927d585ee441 | \n", + "2024-10-16 | \n", + "590892713ceb1d0389dec7a4c7e0e25b | \n", + "America/Los_Angeles | \n", + "aHR0cHM6Ly9wYXNzaW8zLmNvbS9tb250ZXJleXAvcGFzc2... | \n", + "fc6cd27871cce0092a08ccf68fb240a2 | \n", + "schedule | \n", + "None | \n", + "99.0 | \n", + "efb87f21092613134b308705dac0151c | \n", + "
242 | \n", + "8c6823bbc7b380bb1ad362d8a52f7159 | \n", + "2024-10-16 | \n", + "27bc8cf0848f3636c1047194318a92df | \n", + "America/Los_Angeles | \n", + "aHR0cDovL3NjaGVkdWxlLml2dHJhbnNpdC5jb20vcHVibG... | \n", + "fe4aab1717eca5a2935c32c85a35a5bf | \n", + "schedule | \n", + "None | \n", + "134.0 | \n", + "040e72a4d2f1afe5070e62e31d2e6647 | \n", + "
243 | \n", + "dc9b668adf96f067d0c017061ab463fc | \n", + "2024-10-16 | \n", + "638d90ba5ade8b25240024e6cae907e9 | \n", + "America/Los_Angeles | \n", + "aHR0cDovL2FwcC5tZWNhdHJhbi5jb20vdXJiL3dzL2ZlZW... | \n", + "ff1bc5dde661d62c877165421e9ca257 | \n", + "schedule | \n", + "None | \n", + "72.0 | \n", + "69a2e7920b0c88926616670f02e2f531 | \n", + "
244 | \n", + "03b9c687984a5c64ec7b484f8ac950e9 | \n", + "2024-10-16 | \n", + "dbcc1dfac53e7f1429b00b67d1dddfc8 | \n", + "America/Los_Angeles | \n", + "aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... | \n", + "ff51495a24cf286ff2f7bc9b3401f855 | \n", + "schedule | \n", + "None | \n", + "NaN | \n", + "ace2b05f5e1621f256d77bdc46b9e5dd | \n", + "
245 rows × 10 columns
\n", + "\n", + " | key | \n", + "date | \n", + "feed_key | \n", + "feed_timezone | \n", + "base64_url | \n", + "gtfs_dataset_key | \n", + "name | \n", + "type | \n", + "regional_feed_type | \n", + "
---|---|---|---|---|---|---|---|---|---|
0 | \n", + "4125ea47789c59d089ac1d5ead4d7794 | \n", + "2024-10-16 | \n", + "696beb3cb2e375f8524ae18eff0d041d | \n", + "America/Los_Angeles | \n", + "aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... | \n", + "011e7b627d3c66bdff6d132359fe893d | \n", + "Eastern Sierra Flex | \n", + "schedule | \n", + "None | \n", + "
1 | \n", + "c10c3edb0f0b91efbafe23d66eea33d2 | \n", + "2024-10-16 | \n", + "511d593426d1fd821037dfd47d2e3cc8 | \n", + "America/Los_Angeles | \n", + "aHR0cHM6Ly90Y3J0YS50cmlwc2hvdC5jb20vdjEvZ3Rmcy... | \n", + "0139b1253130b33adcd4b3a4490530d2 | \n", + "TCRTA TripShot Schedule | \n", + "schedule | \n", + "None | \n", + "
2 | \n", + "0a052cef2795327217a094d7dcab8941 | \n", + "2024-10-16 | \n", + "8685c49ef1273fe958478531f2c6a781 | \n", + "America/Los_Angeles | \n", + "aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW... | \n", + "015d67d5b75b5cf2b710bbadadfb75f5 | \n", + "Bay Area 511 Marin Schedule | \n", + "schedule | \n", + "Regional Subfeed | \n", + "
3 | \n", + "a01bb4a8d9ac98d49c8c5e92aa0ed930 | \n", + "2024-10-16 | \n", + "705807dec27e43e0d93e9a7e966068d8 | \n", + "America/Los_Angeles | \n", + "aHR0cHM6Ly93d3cuYXVidXJuLmNhLmdvdi9Eb2N1bWVudE... | \n", + "020467a276c12a9fe4b0a2332e393f2c | \n", + "Auburn Schedule | \n", + "schedule | \n", + "None | \n", + "
6 | \n", + "502985287f0329886c92cde5f2f285b2 | \n", + "2024-10-16 | \n", + "0110e66b44a8f07126af415097613bb6 | \n", + "America/Los_Angeles | \n", + "aHR0cHM6Ly9naXRsYWIuY29tL0xBQ01UQS9ndGZzX2J1cy... | \n", + "0666caf3ec1ecc96b74f4477ee4bc939 | \n", + "LA Metro Bus Schedule | \n", + "schedule | \n", + "None | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
252 | \n", + "4e4b4362ff1df49de57ad784a6b45a1a | \n", + "2024-10-16 | \n", + "d53527fad752767624caca4b773f26ea | \n", + "America/Anchorage | \n", + "aHR0cDovL3d3dy5zYW5qdWFuY2FwaXN0cmFuby5yaWRlc3... | \n", + "fbea6a9ec53590f709e42e6aa5d0a79e | \n", + "San Juan Capistrano Trolley Schedule | \n", + "schedule | \n", + "None | \n", + "
253 | \n", + "381626c4ed3c74419d65927d585ee441 | \n", + "2024-10-16 | \n", + "590892713ceb1d0389dec7a4c7e0e25b | \n", + "America/Los_Angeles | \n", + "aHR0cHM6Ly9wYXNzaW8zLmNvbS9tb250ZXJleXAvcGFzc2... | \n", + "fc6cd27871cce0092a08ccf68fb240a2 | \n", + "Spirit Bus Passio Schedule | \n", + "schedule | \n", + "None | \n", + "
254 | \n", + "8c6823bbc7b380bb1ad362d8a52f7159 | \n", + "2024-10-16 | \n", + "27bc8cf0848f3636c1047194318a92df | \n", + "America/Los_Angeles | \n", + "aHR0cDovL3NjaGVkdWxlLml2dHJhbnNpdC5jb20vcHVibG... | \n", + "fe4aab1717eca5a2935c32c85a35a5bf | \n", + "Imperial Valley Transit Schedule | \n", + "schedule | \n", + "None | \n", + "
255 | \n", + "dc9b668adf96f067d0c017061ab463fc | \n", + "2024-10-16 | \n", + "638d90ba5ade8b25240024e6cae907e9 | \n", + "America/Los_Angeles | \n", + "aHR0cDovL2FwcC5tZWNhdHJhbi5jb20vdXJiL3dzL2ZlZW... | \n", + "ff1bc5dde661d62c877165421e9ca257 | \n", + "Santa Ynez Mecatran Schedule | \n", + "schedule | \n", + "None | \n", + "
256 | \n", + "03b9c687984a5c64ec7b484f8ac950e9 | \n", + "2024-10-16 | \n", + "dbcc1dfac53e7f1429b00b67d1dddfc8 | \n", + "America/Los_Angeles | \n", + "aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... | \n", + "ff51495a24cf286ff2f7bc9b3401f855 | \n", + "Blossom Express Schedule | \n", + "schedule | \n", + "None | \n", + "
219 rows × 9 columns
\n", + "