Skip to content

Commit

Permalink
Merge pull request #1284 from cal-itp/sr99-conveyal
Browse files Browse the repository at this point in the history
fix scripts, one-off conveyal export for sr99 cmcp
  • Loading branch information
edasmalchi authored Nov 8, 2024
2 parents 0c9b600 + a9cb025 commit d8f8043
Show file tree
Hide file tree
Showing 5 changed files with 571 additions and 12 deletions.
13 changes: 8 additions & 5 deletions conveyal_update/conveyal_vars.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
import datetime as dt
from shared_utils import rt_dates

GCS_PATH = 'gs://calitp-analytics-data/data-analyses/conveyal_update/'
TARGET_DATE = dt.date(2022, 9, 21) # 2022 date for SCAG request
TARGET_DATE = rt_dates.DATES['oct2024']
OSM_FILE = 'us-west-latest.osm.pbf'
# http://download.geofabrik.de/north-america/us-west-latest.osm.pbf
# first download with wget...

conveyal_regions = {}
# boundaries correspond to Conveyal Analysis regions
conveyal_regions['norcal'] = {'north': 42.03909, 'south': 39.07038, 'east': -119.60541, 'west': -124.49158}
conveyal_regions['central'] = {'north': 39.64165, 'south': 35.87347, 'east': -117.53174, 'west': -123.83789}
conveyal_regions['socal'] = {'north': 35.8935, 'south': 32.5005, 'east': -114.13121, 'west': -121.46759}
conveyal_regions['mojave'] = {'north': 37.81629, 'south': 34.89945, 'east': -114.59015, 'west': -118.38043}
# conveyal_regions['norcal'] = {'north': 42.03909, 'south': 39.07038, 'east': -119.60541, 'west': -124.49158}
# conveyal_regions['central'] = {'north': 39.64165, 'south': 35.87347, 'east': -117.53174, 'west': -123.83789}
# conveyal_regions['socal'] = {'north': 35.8935, 'south': 32.5005, 'east': -114.13121, 'west': -121.46759}
# conveyal_regions['mojave'] = {'north': 37.81629, 'south': 34.89945, 'east': -114.59015, 'west': -118.38043}
# special region for one-off SR99 CMCP
conveyal_regions['sr99'] = {'north': 38.71337, 'south': 34.81154, 'east': -118.66882, 'west': -121.66259}

# # special region for one-off Centennial Corridor
# conveyal_regions['bakersfield'] = {'north': 36.81, 'south': 34.13, 'east': -117.12, 'west': -120.65}
2 changes: 1 addition & 1 deletion conveyal_update/download_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
regions = conveyal_vars.conveyal_regions
TARGET_DATE = conveyal_vars.TARGET_DATE

regions_and_feeds = pd.read_parquet(f'{conveyal_vars.GCS_PATH}regions_feeds_{TARGET_DATE.isoformat()}.parquet')
regions_and_feeds = pd.read_parquet(f'{conveyal_vars.GCS_PATH}regions_feeds_{TARGET_DATE}.parquet')

def download_feed(row):
# need wildcard for file too -- not all are gtfs.zip!
Expand Down
7 changes: 4 additions & 3 deletions conveyal_update/evaluate_feeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def check_defined_elsewhere(row, df):

def get_feeds_check_service():
feeds_on_target = gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(selected_date=TARGET_DATE)
feeds_on_target = feeds_on_target.rename(columns={'name':'gtfs_dataset_name'})
# default will use mtc subfeeds (prev Conveyal behavior), can spec customer facing if we wanna switch

operator_feeds = feeds_on_target.feed_key
Expand All @@ -38,7 +39,7 @@ def get_feeds_check_service():

def attach_transit_services(feeds_on_target: pd.DataFrame):

target_dt = dt.datetime.combine(TARGET_DATE, dt.time(0))
target_dt = dt.datetime.combine(dt.date.fromisoformat(TARGET_DATE), dt.time(0))

services = (tbls.mart_transit_database.dim_gtfs_service_data()
>> filter(_._valid_from <= target_dt, _._valid_to > target_dt)
Expand All @@ -60,7 +61,7 @@ def report_undefined(feeds_on_target: pd.DataFrame):
print(undefined.columns)
print('these feeds have no service defined on target date, nor are their services captured in other feeds:')
# gtfs_dataset_name no longer present, this whole script should probably be updated/replaced
# print(undefined >> select(_.gtfs_dataset_name, _.service_any_feed))
print(undefined >> select(_.gtfs_dataset_name, _.service_any_feed))
print(f'saving detailed csv to {fname}')
undefined.to_csv(fname)
return
Expand All @@ -71,5 +72,5 @@ def report_undefined(feeds_on_target: pd.DataFrame):
feeds_on_target = attach_transit_services(feeds_on_target)
print(f'feeds on target date shape: {feeds_on_target.shape}')
report_undefined(feeds_on_target)
feeds_on_target.to_parquet(f'{conveyal_vars.GCS_PATH}feeds_{TARGET_DATE.isoformat()}.parquet')
feeds_on_target.to_parquet(f'{conveyal_vars.GCS_PATH}feeds_{TARGET_DATE}.parquet')

4 changes: 2 additions & 2 deletions conveyal_update/match_feeds_regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

regions = conveyal_vars.conveyal_regions
TARGET_DATE = conveyal_vars.TARGET_DATE
feeds_on_target = pd.read_parquet(f'{conveyal_vars.GCS_PATH}feeds_{TARGET_DATE.isoformat()}.parquet')
feeds_on_target = pd.read_parquet(f'{conveyal_vars.GCS_PATH}feeds_{TARGET_DATE}.parquet')

def create_region_gdf():
# https://shapely.readthedocs.io/en/stable/reference/shapely.box.html#shapely.box
Expand All @@ -38,4 +38,4 @@ def join_stops_regions(region_gdf: gpd.GeoDataFrame, feeds_on_target: pd.DataFra
regions_and_feeds = join_stops_regions(region_gdf, feeds_on_target)
regions_and_feeds = regions_and_feeds >> inner_join(_, feeds_on_target >> select(_.feed_key, _.gtfs_dataset_name, _.base64_url,
_.date), on = 'feed_key')
regions_and_feeds.to_parquet(f'{conveyal_vars.GCS_PATH}regions_feeds_{TARGET_DATE.isoformat()}.parquet')
regions_and_feeds.to_parquet(f'{conveyal_vars.GCS_PATH}regions_feeds_{TARGET_DATE}.parquet')
Loading

0 comments on commit d8f8043

Please sign in to comment.