Skip to content

Commit

Permalink
move time_helpers to shared_utils
Browse files Browse the repository at this point in the history
  • Loading branch information
tiffanychu90 committed Nov 7, 2024
1 parent 00db0f9 commit c527e3a
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 89 deletions.
2 changes: 2 additions & 0 deletions _shared_utils/shared_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
rt_dates,
rt_utils,
schedule_rt_utils,
time_helpers,
)

__all__ = [
Expand All @@ -22,4 +23,5 @@
"rt_dates",
"rt_utils",
"schedule_rt_utils",
"time_helpers",
]
78 changes: 78 additions & 0 deletions _shared_utils/shared_utils/time_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""
Helpers for defining peak vs offpeak periods and
weekend and weekends so we can aggregate our
existing time-of-day bins.
"""
import datetime

import pandas as pd

PEAK_PERIODS = ["AM Peak", "PM Peak"]

HOURS_BY_TIME_OF_DAY = {
"Owl": 4, # [0, 3]
"Early AM": 3, # [4, 6]
"AM Peak": 3, # [7, 9]
"Midday": 5, # [10, 14]
"PM Peak": 5, # [15, 19]
"Evening": 4, # [20, 23]
}

TIME_OF_DAY_DICT = {
**{k: "peak" for k, v in HOURS_BY_TIME_OF_DAY.items() if k in PEAK_PERIODS},
**{k: "offpeak" for k, v in HOURS_BY_TIME_OF_DAY.items() if k not in PEAK_PERIODS},
}

DAY_TYPE_DICT = {
1: "Sunday",
2: "Monday",
3: "Tuesday",
4: "Wednesday",
5: "Thursday",
6: "Friday",
7: "Saturday",
}

WEEKDAY_DICT = {
**{k: "weekday" for k in ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]},
**{k: "weekend" for k in ["Saturday", "Sunday"]},
}


def time_span_labeling(date_list: list) -> tuple[str]:
"""
If we grab a week's worth of trips, we'll
use this week's average to stand-in for the entire month.
Label with month and year.
"""
time_span_str = list(set([datetime.datetime.strptime(d, "%Y-%m-%d").strftime("%b%Y").lower() for d in date_list]))

time_span_num = list(set([datetime.datetime.strptime(d, "%Y-%m-%d").strftime("%m_%Y").lower() for d in date_list]))

if len(time_span_str) == 1:
return time_span_str[0], time_span_num[0]

else:
print(f"multiple months: {time_span_str}")
return time_span_str, time_span_num


def add_time_span_columns(df: pd.DataFrame, time_span_num: str) -> pd.DataFrame:
"""
Add columns for month / year, use when we have aggregated time-series.
"""
month = int(time_span_num.split("_")[0])
year = int(time_span_num.split("_")[1])

# Downgrade some dtypes for public bucket
df = df.assign(
month=month,
year=year,
).astype(
{
"month": "int16",
"year": "int16",
}
)

return df
2 changes: 0 additions & 2 deletions rt_segment_speeds/segment_speed_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
parallel_corridors,
project_vars,
segment_calcs,
time_helpers,
time_series_utils,
vp_transform,
)
Expand All @@ -21,7 +20,6 @@
"parallel_corridors",
"project_vars",
"segment_calcs",
"time_helpers",
"time_series_utils",
"vp_transform",
]
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
"""
All kinds of GTFS schedule table wrangling.
"""
import dask.dataframe as dd
import geopandas as gpd
import pandas as pd
import dask.dataframe as dd

from typing import Literal, Union

from segment_speed_utils import helpers, time_helpers
from shared_utils import portfolio_utils, rt_utils
from segment_speed_utils import helpers
from shared_utils import portfolio_utils, rt_utils, time_helpers
from segment_speed_utils.project_vars import SEGMENT_GCS

sched_rt_category_dict = {
Expand Down
84 changes: 0 additions & 84 deletions rt_segment_speeds/segment_speed_utils/time_helpers.py

This file was deleted.

0 comments on commit c527e3a

Please sign in to comment.