move time_helpers to shared_utils

cal-itp · Nov 7, 2024 · c527e3a · c527e3a
1 parent 00db0f9
commit c527e3a
Show file tree

Hide file tree

Showing 5 changed files with 83 additions and 89 deletions.
diff --git a/_shared_utils/shared_utils/__init__.py b/_shared_utils/shared_utils/__init__.py
@@ -9,6 +9,7 @@
     rt_dates,
     rt_utils,
     schedule_rt_utils,
+    time_helpers,
 )
 
 __all__ = [
@@ -22,4 +23,5 @@
     "rt_dates",
     "rt_utils",
     "schedule_rt_utils",
+    "time_helpers",
 ]
diff --git a/_shared_utils/shared_utils/time_helpers.py b/_shared_utils/shared_utils/time_helpers.py
@@ -0,0 +1,78 @@
+"""
+Helpers for defining peak vs offpeak periods and
+weekend and weekends so we can aggregate our
+existing time-of-day bins.
+"""
+import datetime
+
+import pandas as pd
+
+PEAK_PERIODS = ["AM Peak", "PM Peak"]
+
+HOURS_BY_TIME_OF_DAY = {
+    "Owl": 4,  # [0, 3]
+    "Early AM": 3,  # [4, 6]
+    "AM Peak": 3,  # [7, 9]
+    "Midday": 5,  # [10, 14]
+    "PM Peak": 5,  # [15, 19]
+    "Evening": 4,  # [20, 23]
+}
+
+TIME_OF_DAY_DICT = {
+    **{k: "peak" for k, v in HOURS_BY_TIME_OF_DAY.items() if k in PEAK_PERIODS},
+    **{k: "offpeak" for k, v in HOURS_BY_TIME_OF_DAY.items() if k not in PEAK_PERIODS},
+}
+
+DAY_TYPE_DICT = {
+    1: "Sunday",
+    2: "Monday",
+    3: "Tuesday",
+    4: "Wednesday",
+    5: "Thursday",
+    6: "Friday",
+    7: "Saturday",
+}
+
+WEEKDAY_DICT = {
+    **{k: "weekday" for k in ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]},
+    **{k: "weekend" for k in ["Saturday", "Sunday"]},
+}
+
+
+def time_span_labeling(date_list: list) -> tuple[str]:
+    """
+    If we grab a week's worth of trips, we'll
+    use this week's average to stand-in for the entire month.
+    Label with month and year.
+    """
+    time_span_str = list(set([datetime.datetime.strptime(d, "%Y-%m-%d").strftime("%b%Y").lower() for d in date_list]))
+
+    time_span_num = list(set([datetime.datetime.strptime(d, "%Y-%m-%d").strftime("%m_%Y").lower() for d in date_list]))
+
+    if len(time_span_str) == 1:
+        return time_span_str[0], time_span_num[0]
+
+    else:
+        print(f"multiple months: {time_span_str}")
+        return time_span_str, time_span_num
+
+
+def add_time_span_columns(df: pd.DataFrame, time_span_num: str) -> pd.DataFrame:
+    """
+    Add columns for month / year, use when we have aggregated time-series.
+    """
+    month = int(time_span_num.split("_")[0])
+    year = int(time_span_num.split("_")[1])
+
+    # Downgrade some dtypes for public bucket
+    df = df.assign(
+        month=month,
+        year=year,
+    ).astype(
+        {
+            "month": "int16",
+            "year": "int16",
+        }
+    )
+
+    return df
diff --git a/rt_segment_speeds/segment_speed_utils/__init__.py b/rt_segment_speeds/segment_speed_utils/__init__.py
@@ -7,7 +7,6 @@
     parallel_corridors,
     project_vars,
     segment_calcs,
-    time_helpers,
     time_series_utils,
     vp_transform,
 )
@@ -21,7 +20,6 @@
     "parallel_corridors",
     "project_vars",
     "segment_calcs",
-    "time_helpers",
     "time_series_utils",
     "vp_transform",
 ]
diff --git a/rt_segment_speeds/segment_speed_utils/gtfs_schedule_wrangling.py b/rt_segment_speeds/segment_speed_utils/gtfs_schedule_wrangling.py
@@ -1,14 +1,14 @@
 """
 All kinds of GTFS schedule table wrangling.
 """
+import dask.dataframe as dd
 import geopandas as gpd
 import pandas as pd
-import dask.dataframe as dd
 
 from typing import Literal, Union
 
-from segment_speed_utils import helpers, time_helpers
-from shared_utils import portfolio_utils, rt_utils
+from segment_speed_utils import helpers
+from shared_utils import portfolio_utils, rt_utils, time_helpers
 from segment_speed_utils.project_vars import SEGMENT_GCS 
 
 sched_rt_category_dict = {

diff --git a/rt_segment_speeds/segment_speed_utils/time_helpers.py b/rt_segment_speeds/segment_speed_utils/time_helpers.py