From f146795b956e1dc9b8718f2ba3a5711e1ae637cd Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Tue, 1 Oct 2024 00:22:16 +0000 Subject: [PATCH 1/6] timestamp issue when comparing scheduled and rt lags --- gtfs_digest/31_transit_bunching_v2.ipynb | 5089 +++++++++++++++++----- 1 file changed, 4089 insertions(+), 1000 deletions(-) diff --git a/gtfs_digest/31_transit_bunching_v2.ipynb b/gtfs_digest/31_transit_bunching_v2.ipynb index 041c9be3b..5c299b8c5 100644 --- a/gtfs_digest/31_transit_bunching_v2.ipynb +++ b/gtfs_digest/31_transit_bunching_v2.ipynb @@ -340,15 +340,6 @@ "route_dir2[\"headway_minutes\"] = 60 / route_dir.frequency" ] }, - { - "cell_type": "markdown", - "id": "a67f59fe-ae28-4e32-9b63-18514d18ab07", - "metadata": {}, - "source": [ - "#### QUESTION: Should I use mean or median for finding routes that are high frequency?\n", - "* Find Median." - ] - }, { "cell_type": "code", "execution_count": 11, @@ -633,6 +624,17 @@ "trips_freq_routes.shape" ] }, + { + "cell_type": "markdown", + "id": "57ae24e1-c656-4482-b228-31637f245542", + "metadata": { + "tags": [] + }, + "source": [ + "#### Attach route type\n", + "* Figure out the proper way to do this." + ] + }, { "cell_type": "code", "execution_count": 27, @@ -661,7 +663,7 @@ { "cell_type": "code", "execution_count": 28, - "id": "04983f98-885d-402c-9518-a64dc4d3d005", + "id": "8dac05a6-0ba9-472b-85a2-5a0081550efb", "metadata": {}, "outputs": [], "source": [ @@ -671,7 +673,7 @@ { "cell_type": "code", "execution_count": 29, - "id": "318a30b8-03a6-44d9-8dcc-ed05c57ff2db", + "id": "cd13aa3e-f222-49f8-b923-1e9e901f7bfb", "metadata": {}, "outputs": [], "source": [ @@ -684,33 +686,36 @@ { "cell_type": "code", "execution_count": 30, - "id": "6d9b7613-c08d-4603-a7eb-0eca3090ccf9", + "id": "ae72b7fc-ec7b-4dcd-8553-ac2abce5da1d", + "metadata": {}, + "outputs": [], + "source": [ + "high_frequency_routes2 = trips_freq_routes.drop(columns=[\"route_type\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "01a26feb-8d31-4308-9868-47840a7f6591", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array(['Bus', 'Rail', 'Subway, Metro', 'Tram, Streetcar, Light rail'],\n", - " dtype=object)" + "Bus 3837\n", + "Tram, Streetcar, Light rail 115\n", + "Rail 98\n", + "Subway, Metro 90\n", + "Name: route_type_str, dtype: int64" ] }, - "execution_count": 30, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "trips_freq_routes.route_type_str.unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "01a26feb-8d31-4308-9868-47840a7f6591", - "metadata": {}, - "outputs": [], - "source": [ - "trips_freq_routes = trips_freq_routes.drop(columns=[\"route_type\"])" + "high_frequency_routes2.route_type_str.value_counts()" ] }, { @@ -796,7 +801,7 @@ { "data": { "text/plain": [ - "(118214, 19)" + "(118214, 20)" ] }, "execution_count": 35, @@ -840,158 +845,158 @@ { "cell_type": "code", "execution_count": 37, - "id": "7c59d770-d379-422e-a23d-9140c23df375", + "id": "50bd294b-d96b-4dd8-9890-cb91cd21852c", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Int64Index: 118214 entries, 0 to 118213\n", - "Data columns (total 19 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 trip_id 118214 non-null object \n", - " 1 stop_id 118214 non-null object \n", - " 2 stop_sequence 118214 non-null int64 \n", - " 3 scheduled_arrival_sec 116687 non-null float64 \n", - " 4 schedule_gtfs_dataset_key 118214 non-null object \n", - " 5 trip_instance_key 118214 non-null object \n", - " 6 rt_arrival_sec 118214 non-null int64 \n", - " 7 route_id 118214 non-null object \n", - " 8 shape_array_key 118214 non-null object \n", - " 9 feed_key 118214 non-null object \n", - " 10 route_long_name 118214 non-null object \n", - " 11 direction_id 118214 non-null float64 \n", - " 12 route_primary_direction 118214 non-null object \n", - " 13 med_headway_minutes 118214 non-null float64 \n", - " 14 organization_name 118214 non-null object \n", - " 15 name 118214 non-null object \n", - " 16 caltrans_district 118214 non-null object \n", - " 17 service_date 118214 non-null datetime64[ns]\n", - " 18 route_type_str 118214 non-null object \n", - "dtypes: datetime64[ns](1), float64(3), int64(2), object(13)\n", - "memory usage: 18.0+ MB\n" - ] + "data": { + "text/plain": [ + "0.9870827482362495" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "rt_stop_times2.info()" + "(len(rt_stop_times2) - rt_stop_times2.scheduled_arrival_sec.isna().sum()) / len(\n", + " rt_stop_times2\n", + ")" ] }, { "cell_type": "code", "execution_count": 38, + "id": "7c59d770-d379-422e-a23d-9140c23df375", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.012917251763750486" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(rt_stop_times2.scheduled_arrival_sec.isna().sum()) / len(rt_stop_times2)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, "id": "bfb52e77-27c2-41d9-ab83-de76497577da", "metadata": {}, "outputs": [], "source": [ + "# Add a copy of scheduled arrival sec\n", "rt_stop_times2[\"scheduled_arrival_sec_copy\"] = rt_stop_times2.scheduled_arrival_sec" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 40, "id": "f6bf9598-60b9-4169-9f71-53be00cef8b8", "metadata": {}, "outputs": [], "source": [ - "rt_stop_times3 = rt_stop_times2.loc[rt_stop_times2.scheduled_arrival_sec.notna()].reset_index(drop = True)" + "rt_stop_times3 = rt_stop_times2.loc[\n", + " rt_stop_times2.scheduled_arrival_sec.notna()\n", + "].reset_index(drop=True)" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 41, "id": "70477a11-34b4-45a7-9b1f-35dfd2d68231", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 116687 entries, 0 to 116686\n", - "Data columns (total 20 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 trip_id 116687 non-null object \n", - " 1 stop_id 116687 non-null object \n", - " 2 stop_sequence 116687 non-null int64 \n", - " 3 scheduled_arrival_sec 116687 non-null float64 \n", - " 4 schedule_gtfs_dataset_key 116687 non-null object \n", - " 5 trip_instance_key 116687 non-null object \n", - " 6 rt_arrival_sec 116687 non-null int64 \n", - " 7 route_id 116687 non-null object \n", - " 8 shape_array_key 116687 non-null object \n", - " 9 feed_key 116687 non-null object \n", - " 10 route_long_name 116687 non-null object \n", - " 11 direction_id 116687 non-null float64 \n", - " 12 route_primary_direction 116687 non-null object \n", - " 13 med_headway_minutes 116687 non-null float64 \n", - " 14 organization_name 116687 non-null object \n", - " 15 name 116687 non-null object \n", - " 16 caltrans_district 116687 non-null object \n", - " 17 service_date 116687 non-null datetime64[ns]\n", - " 18 route_type_str 116687 non-null object \n", - " 19 scheduled_arrival_sec_copy 116687 non-null float64 \n", - "dtypes: datetime64[ns](1), float64(4), int64(2), object(13)\n", - "memory usage: 17.8+ MB\n" - ] + "data": { + "text/plain": [ + "116687" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "rt_stop_times3.info()" - ] - }, - { - "cell_type": "markdown", - "id": "48737ec7-31be-4743-97ca-c6c000670a13", - "metadata": {}, - "source": [ - "### `rt_stop_times3`: Deal with time\n", - "\n", - "* If 82800 < `scheduled_arrival_time` < 86_400 but `rt_arrival_sec` is lower say 14_000 (4 am in the morning): then perhaps the bus was scheduled to arrive on May 21 (day before the service date) but it arrived a little later on the service date. \n", - "\n", - "* If 86_400 < `scheduled_arrival_time` and `rt_arrival_sec` is around 86_000 then this is the same service date. " + "len(rt_stop_times3)" ] }, { "cell_type": "code", - "execution_count": 41, - "id": "d9e8d6f1-a66e-44a2-a448-5fdb3a8f2da2", + "execution_count": 42, + "id": "ca2e1613-2d2c-40ba-8518-2e3297c13ec8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "82800" + "118214" ] }, - "execution_count": 41, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "86_400 - (60*60)" + "len(rt_stop_times2)" + ] + }, + { + "cell_type": "markdown", + "id": "7560c906-dbbc-46e6-9303-6ed1aaf2e9d2", + "metadata": {}, + "source": [ + "### Sort" ] }, { "cell_type": "code", - "execution_count": 42, - "id": "4485d9ea-0eb1-4b2e-a249-9de75d2a0296", + "execution_count": 43, + "id": "0ef36e93-79ed-4f86-b16a-9d28d90aea1a", "metadata": {}, "outputs": [], "source": [ - "#rt_stop_times3[\"scheduled_arrival_sec_2\"] = (\n", - "# rt_stop_times3[\"scheduled_arrival_sec\"] % 86_400\n", - "#).fillna(0)" + "# Rearrange: I want the stop sequence to be 1,2,3,4.\n", + "# stop ids can differ between trips of the same route and the same stop sequence is the same\n", + "rt_stop_times3 = rt_stop_times3.sort_values(\n", + " by=[\n", + " \"schedule_gtfs_dataset_key\",\n", + " \"route_id\",\n", + " \"shape_array_key\",\n", + " \"direction_id\",\n", + " \"stop_sequence\",\n", + " \"rt_arrival_sec\",\n", + " ]\n", + ").reset_index(drop=True)" + ] + }, + { + "cell_type": "markdown", + "id": "48737ec7-31be-4743-97ca-c6c000670a13", + "metadata": { + "tags": [] + }, + "source": [ + "### `rt_stop_times3`: Deal with time\n", + "\n", + "* If 82800 < `scheduled_arrival_time` < 86_400 but `rt_arrival_sec` is lower say 14_000 (4 am in the morning): then perhaps the bus was scheduled to arrive on May 21 (day before the service date) but it arrived a little later on the service date. \n", + "\n", + "* If 86_400 < `scheduled_arrival_time` and `rt_arrival_sec` is around 86_000 then this is the same service date. " ] }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 44, "id": "c93dffd6-6fe7-43f0-85fb-061b20d29a74", "metadata": {}, "outputs": [ @@ -1009,7 +1014,7 @@ "Name: scheduled_arrival_sec, dtype: float64" ] }, - "execution_count": 43, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -1020,17 +1025,17 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 45, "id": "97e3dc65-45fc-4318-a5f6-b0f497a6ab04", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(16, 20)" + "(16, 21)" ] }, - "execution_count": 44, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -1041,579 +1046,219 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 46, "id": "15a58951-9ee0-4696-9065-b768737a582d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "82800" + "(2583, 21)" ] }, - "execution_count": 45, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "86_400-3_600" + "rt_stop_times3.loc[rt_stop_times3[\"scheduled_arrival_sec\"] > 86_400].shape" ] }, { - "cell_type": "code", - "execution_count": 46, - "id": "d64637d8-ca0d-4e3c-b956-13d8e64e513c", + "cell_type": "markdown", + "id": "641ca6ae-b5e4-4871-95de-fd29447f831a", "metadata": {}, - "outputs": [], "source": [ - "def adjust_days_and_time(seconds, date, rt_arrival_sec):\n", - " \"\"\"\n", - " Adjusts days and time based on seconds and rt_arrival_sec.\n", - "\n", - " Parameters:\n", - " seconds (int): Number of seconds.\n", - " date (datetime): Initial date.\n", - " rt_arrival_sec (int): Arrival time in seconds.\n", - "\n", - " Returns:\n", - " datetime: Adjusted date and time.\n", - " \"\"\"\n", - " if rt_arrival_sec < 16_000 and ((86_400) < seconds < 106_000):\n", - " # Subtract a day\n", - " return pd.Timestamp(date + pd.Timedelta(days=-1)) + pd.Timedelta(seconds=seconds % 86400)\n", - " elif (86_000 < rt_arrival_sec < 86_400) and (86_400 < seconds):\n", - " # Add days and remaining seconds\n", - " return pd.Timestamp(date) + pd.Timedelta(seconds=seconds % 86400)\n", - " #elif rt_arrival_sec < (86_400/2) and (86_400 < seconds):\n", - " ## Don't add extra day\n", - " # return pd.Timestamp(date) + pd.Timedelta(seconds=seconds % 86400)\n", - " elif seconds == 86_400:\n", - " # Add one day and reset time\n", - " date2 = pd.Timestamp(date + pd.Timedelta(days=1))\n", - " date_timestamp = date2.replace(hour=0, minute=0, second=0)\n", - " return date_timestamp\n", - " else:\n", - " # No change\n", - " return pd.Timestamp(date) + pd.Timedelta(seconds=seconds)" + "#### Test `scheduled_arrival_sec` rows ` rows that exceed 86,400 seconds" ] }, { "cell_type": "code", "execution_count": 47, - "id": "4b07d362-636f-49ed-983f-cb348660034c", + "id": "546d1250-aad7-40b4-8349-7189da606cc2", "metadata": {}, "outputs": [], "source": [ - "rt_stop_times3[\"converted_schd_arrival\"]= rt_stop_times3.apply(lambda row: adjust_days_and_time(row['scheduled_arrival_sec'], row['service_date'], row['rt_arrival_sec']), axis=1)" + "# Convert to midnight anything that goes past the service date\n", + "# rt_stop_times3 = convert_to_midnight(\n", + "# rt_stop_times3, \"scheduled_arrival_sec\", may_date\n", + "# )" ] }, { "cell_type": "code", "execution_count": 48, - "id": "f68db963-dfec-4861-9831-e008bc4890f0", + "id": "099c6838-aa42-4eb5-a250-0ac3e054f834", "metadata": {}, "outputs": [], "source": [ - "def convert_to_midnight(df, date_column, comparison_date):\n", - " \"\"\"\n", - " Converts timestamps in a DataFrame to midnight if the date exceeds the comparison date.\n", - "\n", - " Args:\n", - " - df (pd.DataFrame): Input DataFrame.\n", - " - date_column (str): Name of the column containing timestamps.\n", - " - comparison_date (str or pd.Timestamp): Date for comparison.\n", - "\n", - " Returns:\n", - " - pd.DataFrame: Modified DataFrame with timestamps converted to midnight.\n", - " \"\"\"\n", - " comparison_date = pd.to_datetime(comparison_date)\n", - "\n", - " # Ensure date_column is datetime type\n", - " df[date_column] = pd.to_datetime(df[date_column])\n", - "\n", - " # Mask dates exceeding comparison_date and replace with midnight\n", - " mask = df[date_column].dt.date > comparison_date.date()\n", - " df.loc[mask, date_column] = df.loc[mask, date_column].dt.normalize()\n", - "\n", - " return df" + "timestamp_subset = [\n", + " \"converted_schd_arrival\",\n", + " \"converted_rt_arrival\",\n", + " \"scheduled_arrival_sec\",\n", + " \"rt_arrival_sec\",\n", + " \"service_date\",\n", + "]" ] }, { "cell_type": "code", "execution_count": 49, - "id": "546d1250-aad7-40b4-8349-7189da606cc2", + "id": "f74e40ac-5a09-4ae2-8ac2-1f1269957330", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "82800" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Convert to midnight anything that goes past the service date\n", - "rt_stop_times3 = convert_to_midnight(\n", - " rt_stop_times3, \"scheduled_arrival_sec\", may_date\n", - ")" + "86_400 - 3600" ] }, { "cell_type": "code", "execution_count": 50, - "id": "5db197c8-e234-4427-8fdd-2a204d258040", + "id": "d64637d8-ca0d-4e3c-b956-13d8e64e513c", "metadata": {}, "outputs": [], "source": [ - "rt_stop_times3[\"converted_rt_arrival\"] = pd.to_datetime(\n", - " rt_stop_times3[\"service_date\"]\n", - ") + pd.to_timedelta(rt_stop_times3[\"rt_arrival_sec\"] % 86400, unit=\"s\")" + "def adjust_days_and_time(seconds, date, rt_arrival_sec):\n", + " \"\"\"\n", + " Adjusts days and time based on seconds and rt_arrival_sec.\n", + "\n", + " Parameters:\n", + " seconds (int): Number of seconds.\n", + " date (datetime): Initial date.\n", + " rt_arrival_sec (int): Arrival time in seconds.\n", + "\n", + " Returns:\n", + " datetime: Adjusted date and time.\n", + " \"\"\"\n", + " if rt_arrival_sec < (60 * 60) and (85_000 < seconds < 87_000):\n", + " # Subtract a day\n", + " return pd.Timestamp(date + pd.Timedelta(days=-1)) + pd.Timedelta(\n", + " seconds=seconds % 86400\n", + " )\n", + " elif rt_arrival_sec < (86_400 / 2) and (86_400 < seconds):\n", + " return pd.Timestamp(date) + pd.Timedelta(seconds=seconds % 86400)\n", + " else:\n", + " # No change\n", + " return pd.Timestamp(date) + pd.Timedelta(seconds=seconds)" ] }, { "cell_type": "code", "execution_count": 51, - "id": "099c6838-aa42-4eb5-a250-0ac3e054f834", + "id": "7136cbd5-d0f8-47ca-873e-ee00b6c7a207", "metadata": {}, "outputs": [], "source": [ - "timestamp_subset = [\n", - " \"converted_schd_arrival\",\n", - " \"converted_rt_arrival\",\n", - " \"scheduled_arrival_sec\",\n", - " \"rt_arrival_sec\",\n", - " \"service_date\",\n", - "]" + "more_than_86400 = rt_stop_times3.loc[\n", + " rt_stop_times3[\"scheduled_arrival_sec\"] > 86_400\n", + "].reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 52, - "id": "0ef36e93-79ed-4f86-b16a-9d28d90aea1a", + "id": "4b07d362-636f-49ed-983f-cb348660034c", "metadata": {}, "outputs": [], "source": [ - "# Rearrange: I want the stop sequence to be 1,2,3,4.\n", - "# stop ids can differ between trips of the same route and the same stop sequence is the same\n", - "rt_stop_times4 = rt_stop_times3.sort_values(\n", - " by=[\n", - " \"schedule_gtfs_dataset_key\",\n", - " \"route_id\",\n", - " \"shape_array_key\",\n", - " \"direction_id\",\n", - " \"stop_sequence\",\n", - " \"rt_arrival_sec\",\n", - " ]\n", - ").reset_index(drop=True)" + "more_than_86400[\"converted_schd_arrival\"] = more_than_86400.apply(\n", + " lambda row: adjust_days_and_time(\n", + " row[\"scheduled_arrival_sec\"], row[\"service_date\"], row[\"rt_arrival_sec\"]\n", + " ),\n", + " axis=1,\n", + ")" ] }, { - "cell_type": "markdown", - "id": "8a83b6e8-fbab-4ce2-b91e-bf32890bda18", + "cell_type": "code", + "execution_count": 53, + "id": "5db197c8-e234-4427-8fdd-2a204d258040", "metadata": {}, + "outputs": [], "source": [ - "### Calculate the difference btwn actual vs scheduled arrival." + "more_than_86400[\"converted_rt_arrival\"] = pd.to_datetime(\n", + " more_than_86400[\"service_date\"]\n", + ") + pd.to_timedelta(more_than_86400[\"rt_arrival_sec\"] % 86400, unit=\"s\")" ] }, { "cell_type": "code", - "execution_count": 53, - "id": "928199b9-8459-4c3b-9744-fcd482f896f3", + "execution_count": 54, + "id": "394eb491-9f3e-4c14-88d9-963925326dba", "metadata": {}, "outputs": [], "source": [ - "rt_stop_times4['delay_min'] = (rt_stop_times4['converted_rt_arrival'] - rt_stop_times4['converted_schd_arrival']).dt.total_seconds() / 60\n" + "more_than_86400[\"delay_min\"] = (\n", + " more_than_86400[\"converted_rt_arrival\"] - more_than_86400[\"converted_schd_arrival\"]\n", + ").dt.total_seconds() / 60" ] }, { "cell_type": "code", - "execution_count": 54, - "id": "525812c8-8a9b-4a52-a2e1-4a62a47cbada", + "execution_count": 55, + "id": "0dc35f0f-2887-415a-80fd-1ef8fd4644dc", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "count 116687.00\n", - "mean 30.33\n", - "std 216.39\n", - "min -1441.97\n", - "1% -5.52\n", - "2% -3.48\n", - "5% -2.13\n", - "10% -1.25\n", - "50% 1.87\n", - "90% 9.30\n", - "95% 14.36\n", - "98% 1437.88\n", - "99% 1441.57\n", - "max 1701.60\n", + "count 2583.00\n", + "mean 101.87\n", + "std 365.42\n", + "min -18.18\n", + "1% -5.47\n", + "2% -3.25\n", + "5% -1.92\n", + "10% -1.20\n", + "50% 1.75\n", + "90% 9.60\n", + "95% 1440.26\n", + "98% 1445.47\n", + "99% 1448.25\n", + "max 1466.70\n", "Name: delay_min, dtype: float64\n" ] } ], "source": [ - "print(rt_stop_times4.delay_min.describe(percentiles=[0.01,0.02, 0.05, 0.1, 0.9, 0.95, 0.98,0.99]))" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "772fb613-2369-4971-984a-40680912c7dd", - "metadata": { - "scrolled": true, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
converted_schd_arrivalconverted_rt_arrivalscheduled_arrival_secrt_arrival_secservice_datescheduled_arrival_sec_copydelay_min
17882024-05-23 04:23:002024-05-22 04:27:471970-01-01 00:00:00.000102180160672024-05-22102180.00-1435.22
18942024-05-23 04:25:002024-05-22 04:28:321970-01-01 00:00:00.000102300161122024-05-22102300.00-1436.47
20002024-05-23 04:25:002024-05-22 04:30:201970-01-01 00:00:00.000102300162202024-05-22102300.00-1434.67
21062024-05-23 04:26:002024-05-22 04:31:131970-01-01 00:00:00.000102360162732024-05-22102360.00-1434.78
22042024-05-22 23:55:002024-05-22 00:00:441970-01-01 00:00:00.000086100442024-05-2286100.00-1434.27
22132024-05-23 04:27:002024-05-22 04:32:041970-01-01 00:00:00.000102420163242024-05-22102420.00-1434.93
23102024-05-22 23:56:002024-05-22 00:01:171970-01-01 00:00:00.000086160772024-05-2286160.00-1434.72
23192024-05-23 04:28:002024-05-22 04:33:221970-01-01 00:00:00.000102480164022024-05-22102480.00-1434.63
24162024-05-22 23:56:002024-05-22 00:01:351970-01-01 00:00:00.000086160952024-05-2286160.00-1434.42
24252024-05-23 04:28:002024-05-22 04:33:531970-01-01 00:00:00.000102480164332024-05-22102480.00-1434.12
\n", - "
" - ], - "text/plain": [ - " converted_schd_arrival converted_rt_arrival \\\n", - "1788 2024-05-23 04:23:00 2024-05-22 04:27:47 \n", - "1894 2024-05-23 04:25:00 2024-05-22 04:28:32 \n", - "2000 2024-05-23 04:25:00 2024-05-22 04:30:20 \n", - "2106 2024-05-23 04:26:00 2024-05-22 04:31:13 \n", - "2204 2024-05-22 23:55:00 2024-05-22 00:00:44 \n", - "2213 2024-05-23 04:27:00 2024-05-22 04:32:04 \n", - "2310 2024-05-22 23:56:00 2024-05-22 00:01:17 \n", - "2319 2024-05-23 04:28:00 2024-05-22 04:33:22 \n", - "2416 2024-05-22 23:56:00 2024-05-22 00:01:35 \n", - "2425 2024-05-23 04:28:00 2024-05-22 04:33:53 \n", - "\n", - " scheduled_arrival_sec rt_arrival_sec service_date \\\n", - "1788 1970-01-01 00:00:00.000102180 16067 2024-05-22 \n", - "1894 1970-01-01 00:00:00.000102300 16112 2024-05-22 \n", - "2000 1970-01-01 00:00:00.000102300 16220 2024-05-22 \n", - "2106 1970-01-01 00:00:00.000102360 16273 2024-05-22 \n", - "2204 1970-01-01 00:00:00.000086100 44 2024-05-22 \n", - "2213 1970-01-01 00:00:00.000102420 16324 2024-05-22 \n", - "2310 1970-01-01 00:00:00.000086160 77 2024-05-22 \n", - "2319 1970-01-01 00:00:00.000102480 16402 2024-05-22 \n", - "2416 1970-01-01 00:00:00.000086160 95 2024-05-22 \n", - "2425 1970-01-01 00:00:00.000102480 16433 2024-05-22 \n", - "\n", - " scheduled_arrival_sec_copy delay_min \n", - "1788 102180.00 -1435.22 \n", - "1894 102300.00 -1436.47 \n", - "2000 102300.00 -1434.67 \n", - "2106 102360.00 -1434.78 \n", - "2204 86100.00 -1434.27 \n", - "2213 102420.00 -1434.93 \n", - "2310 86160.00 -1434.72 \n", - "2319 102480.00 -1434.63 \n", - "2416 86160.00 -1434.42 \n", - "2425 102480.00 -1434.12 " - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rt_stop_times4.loc[rt_stop_times4.delay_min < -1433][\n", - " timestamp_subset\n", - " + [\n", - " \"scheduled_arrival_sec_copy\",\n", - " \"delay_min\",\n", - " \n", - " ]\n", - "].head(10)" + "print(\n", + " more_than_86400.delay_min.describe(\n", + " percentiles=[0.01, 0.02, 0.05, 0.1, 0.9, 0.95, 0.98, 0.99]\n", + " )\n", + ")" ] }, { "cell_type": "code", "execution_count": 56, - "id": "e1504adb-e083-4642-8801-efc317fcebbf", + "id": "f397e6fd-daa9-41ba-9649-633d377088a0", "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
converted_schd_arrivalconverted_rt_arrivalscheduled_arrival_secrt_arrival_secservice_datescheduled_arrival_sec_copydelay_min
02024-05-21 00:08:002024-05-22 00:06:521970-01-01 00:00:00.0000868804122024-05-2286880.001438.87
12024-05-21 00:41:002024-05-22 00:44:221970-01-01 00:00:00.00008886026622024-05-2288860.001443.37
22024-05-21 01:11:002024-05-22 01:12:291970-01-01 00:00:00.00009066043492024-05-2290660.001441.48
32024-05-21 01:41:002024-05-22 01:39:471970-01-01 00:00:00.00009246059872024-05-2292460.001438.78
42024-05-21 02:11:002024-05-22 02:11:401970-01-01 00:00:00.00009426079002024-05-2294260.001440.67
52024-05-21 02:41:002024-05-22 02:39:141970-01-01 00:00:00.00009606095542024-05-2296060.001438.23
62024-05-21 03:11:002024-05-22 03:12:431970-01-01 00:00:00.000097860115632024-05-2297860.001441.72
72024-05-21 03:42:002024-05-22 03:41:051970-01-01 00:00:00.000099720132652024-05-2299720.001439.08
82024-05-21 04:12:002024-05-22 04:11:441970-01-01 00:00:00.000101520151042024-05-22101520.001439.73
1042024-05-21 00:09:002024-05-22 00:08:331970-01-01 00:00:00.0000869405132024-05-2286940.001439.55
\n", - "
" - ], "text/plain": [ - " converted_schd_arrival converted_rt_arrival scheduled_arrival_sec \\\n", - "0 2024-05-21 00:08:00 2024-05-22 00:06:52 1970-01-01 00:00:00.000086880 \n", - "1 2024-05-21 00:41:00 2024-05-22 00:44:22 1970-01-01 00:00:00.000088860 \n", - "2 2024-05-21 01:11:00 2024-05-22 01:12:29 1970-01-01 00:00:00.000090660 \n", - "3 2024-05-21 01:41:00 2024-05-22 01:39:47 1970-01-01 00:00:00.000092460 \n", - "4 2024-05-21 02:11:00 2024-05-22 02:11:40 1970-01-01 00:00:00.000094260 \n", - "5 2024-05-21 02:41:00 2024-05-22 02:39:14 1970-01-01 00:00:00.000096060 \n", - "6 2024-05-21 03:11:00 2024-05-22 03:12:43 1970-01-01 00:00:00.000097860 \n", - "7 2024-05-21 03:42:00 2024-05-22 03:41:05 1970-01-01 00:00:00.000099720 \n", - "8 2024-05-21 04:12:00 2024-05-22 04:11:44 1970-01-01 00:00:00.000101520 \n", - "104 2024-05-21 00:09:00 2024-05-22 00:08:33 1970-01-01 00:00:00.000086940 \n", - "\n", - " rt_arrival_sec service_date scheduled_arrival_sec_copy delay_min \n", - "0 412 2024-05-22 86880.00 1438.87 \n", - "1 2662 2024-05-22 88860.00 1443.37 \n", - "2 4349 2024-05-22 90660.00 1441.48 \n", - "3 5987 2024-05-22 92460.00 1438.78 \n", - "4 7900 2024-05-22 94260.00 1440.67 \n", - "5 9554 2024-05-22 96060.00 1438.23 \n", - "6 11563 2024-05-22 97860.00 1441.72 \n", - "7 13265 2024-05-22 99720.00 1439.08 \n", - "8 15104 2024-05-22 101520.00 1439.73 \n", - "104 513 2024-05-22 86940.00 1439.55 " + "count 2583.00\n", + "mean 7041.60\n", + "std 5789.22\n", + "min 9.00\n", + "25% 2556.50\n", + "50% 5874.00\n", + "75% 10739.00\n", + "max 86389.00\n", + "Name: rt_arrival_sec, dtype: float64" ] }, "execution_count": 56, @@ -1622,70 +1267,19 @@ } ], "source": [ - "rt_stop_times4.loc[rt_stop_times4.delay_min >1438][\n", - " timestamp_subset\n", - " + [\n", - " \"scheduled_arrival_sec_copy\",\n", - " \"delay_min\",\n", - " \n", - " ]\n", - "].head(10)" - ] - }, - { - "cell_type": "markdown", - "id": "cb7802ee-e6f1-4333-aac1-3895ab01b43e", - "metadata": {}, - "source": [ - "#### Observation\n", - "* There are some geniunely weird rows." + "more_than_86400.rt_arrival_sec.describe()" ] }, { "cell_type": "code", "execution_count": 57, - "id": "52d2642d-6ac1-41f0-a29c-bc2f3c93a7af", + "id": "ac956fa6-31a8-47c2-a451-ada54847aecb", "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
converted_schd_arrivalconverted_rt_arrivalscheduled_arrival_secrt_arrival_secservice_datescheduled_arrival_sec_copydelay_min
\n", - "
" - ], "text/plain": [ - "Empty DataFrame\n", - "Columns: [converted_schd_arrival, converted_rt_arrival, scheduled_arrival_sec, rt_arrival_sec, service_date, scheduled_arrival_sec_copy, delay_min]\n", - "Index: []" + "(2583, 24)" ] }, "execution_count": 57, @@ -1694,637 +1288,4132 @@ } ], "source": [ - "rt_stop_times4.loc[rt_stop_times4.delay_min >2800][\n", - " timestamp_subset\n", - " + [\n", - " \"scheduled_arrival_sec_copy\",\n", - " \"delay_min\",\n", - " \n", - " ]\n", - "]" + "more_than_86400.shape" ] }, { "cell_type": "markdown", - "id": "6aed2f18-b89c-4269-beda-9d12b6b56082", + "id": "ca60b033-1f55-45a2-ba8a-bd493d99a95f", "metadata": {}, "source": [ - "### `rt_stop_times5`: Filter out values in `delay` that are very extreme." + "#### Apply to all rows" ] }, { "cell_type": "code", "execution_count": 58, - "id": "18252b55-7099-4137-9add-9b5f77f05643", + "id": "41f6aded-7f9c-4f5e-ba7a-2a52769a3e44", "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'stop' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[58], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mstop\u001b[49m\n", - "\u001b[0;31mNameError\u001b[0m: name 'stop' is not defined" - ] - } - ], + "outputs": [], "source": [ - "stop" + "rt_stop_times3[\"converted_schd_arrival\"] = rt_stop_times3.apply(\n", + " lambda row: adjust_days_and_time(\n", + " row[\"scheduled_arrival_sec\"], row[\"service_date\"], row[\"rt_arrival_sec\"]\n", + " ),\n", + " axis=1,\n", + ")" ] }, { "cell_type": "code", - "execution_count": null, - "id": "bbd1cbc3-9762-44b8-a564-c33e57879d10", + "execution_count": 59, + "id": "3ce579d1-0392-41a6-b535-9f4422a78216", "metadata": {}, "outputs": [], "source": [ - "# Filter to only delays that are an hour or less\n", - "rt_stop_times5 = rt_stop_times4[rt_stop_times4[\"delay\"] <= 3600].reset_index(drop=True)" + "rt_stop_times3[\"converted_rt_arrival\"] = pd.to_datetime(\n", + " rt_stop_times3[\"service_date\"]\n", + ") + pd.to_timedelta(rt_stop_times3[\"rt_arrival_sec\"] % 86400, unit=\"s\")" ] }, { "cell_type": "code", - "execution_count": null, - "id": "9c19be36-0e5b-428d-bc4f-da52821208f4", + "execution_count": 60, + "id": "838af336-3592-40d8-be62-5debad7ab0c1", "metadata": {}, "outputs": [], "source": [ - "# Filter to only delays that are no less than\n", - "rt_stop_times5 = rt_stop_times5[rt_stop_times5[\"delay\"] >= -3600].reset_index(drop=True)" + "rt_stop_times3[\"delay_min\"] = (\n", + " rt_stop_times3[\"converted_rt_arrival\"] - rt_stop_times3[\"converted_schd_arrival\"]\n", + ").dt.total_seconds() / 60" ] }, { "cell_type": "code", - "execution_count": null, - "id": "813c65e5-8ee6-4896-82d7-c3e6dec8a92f", + "execution_count": 61, + "id": "984f27fd-5742-4a76-9467-4a518824f6e8", "metadata": {}, "outputs": [], "source": [ - "rt_stop_times5.shape" + "def add_day_to_23_hours(df):\n", + " df[\"converted_schd_arrival\"] = df.apply(\n", + " lambda row: row[\"converted_schd_arrival\"] + pd.Timedelta(days=1)\n", + " if row[\"delay_min\"] > (23 * 60)\n", + " else row[\"converted_schd_arrival\"],\n", + " axis=1,\n", + " )\n", + "\n", + " df[\"delay_min\"] = (\n", + " df[\"converted_rt_arrival\"] - rt_stop_times3[\"converted_schd_arrival\"]\n", + " ).dt.total_seconds() / 60\n", + " return df" ] }, { "cell_type": "code", - "execution_count": null, - "id": "f82d3e09-205e-4458-a9fa-d9e3096c366b", + "execution_count": 62, + "id": "003abe45-5d55-4839-80f8-83e693214427", "metadata": {}, "outputs": [], "source": [ - "rt_stop_times4.shape" + "percentiles = [0.01, 0.02, 0.05, 0.1, 0.9, 0.95, 0.98, 0.99]" ] }, { "cell_type": "code", - "execution_count": null, - "id": "7668b5b5-4f1a-4fc7-9fac-b3a113c2b0b2", + "execution_count": 63, + "id": "8325eed2-412b-4202-9d57-252db2fd7e26", "metadata": {}, "outputs": [], "source": [ - "len(rt_stop_times4) - len(rt_stop_times5)" + "rt_stop_times3 = add_day_to_23_hours(rt_stop_times3)" ] }, { - "cell_type": "markdown", - "id": "81bd753a-08ee-4d09-ac79-213e1e605405", + "cell_type": "code", + "execution_count": 64, + "id": "a16335dc-47a0-4ea1-9612-fc6d5dfea1d6", "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "count 116687.00\n", + "mean 3.10\n", + "std 20.22\n", + "min -1345.67\n", + "1% -4.85\n", + "2% -3.38\n", + "5% -2.12\n", + "10% -1.25\n", + "50% 1.78\n", + "90% 8.33\n", + "95% 11.50\n", + "98% 17.12\n", + "99% 23.32\n", + "max 727.87\n", + "Name: delay_min, dtype: float64\n" + ] + } + ], "source": [ - "### Calculate the actual headway the `operator-route-direction_id-stop_sequence-stop_id-` grain\n", - "#### QUESTION: Do I need to include feed key and shape array key? What is `feed_key` and how does it differ from `schedule_gtfs_dataset_key`? Still need help" + "print(rt_stop_times3.delay_min.describe(percentiles))" ] }, { - "cell_type": "markdown", - "id": "0c9377cb-cb25-43da-9f3b-752a8107b6b7", + "cell_type": "code", + "execution_count": 65, + "id": "0daa5a88-f43f-40c9-9032-e7409125e9da", "metadata": {}, + "outputs": [], "source": [ - "### Calculate scheduled headway\n", - "* Using the same grain." + "percentile_99 = rt_stop_times3[\"delay_min\"].quantile(0.99)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "b9171520-7358-4f22-9d9c-c5156e710f1b", + "execution_count": 66, + "id": "8ec23dec-abe6-4c95-9523-a4a6e7c8d1ad", "metadata": {}, "outputs": [], "source": [ - "groupby_cols = [\n", - " \"schedule_gtfs_dataset_key\",\n", - " \"route_id\",\n", - " \"shape_array_key\",\n", - " \"direction_id\",\n", - " \"route_primary_direction\",\n", - " \"stop_sequence\",\n", - " \"stop_id\",\n", - "]" + "percentile_01 = rt_stop_times3[\"delay_min\"].quantile(0.01)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "f5b62023-ab77-4634-a526-3d822bb7a63f", + "execution_count": 67, + "id": "4a87e0b4-ef25-45ad-9ec7-268787d6bb86", "metadata": {}, "outputs": [], "source": [ - "# Subtract rt_arrival_sec from the previous row to the target row\n", - "# using groupby columns\n", - "rt_stop_times5[\"actual_headway\"] = rt_stop_times5.groupby(groupby_cols)[\n", - " \"rt_arrival_sec\"\n", - "].diff()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6328c5fb-205e-4a78-a3d3-62fafa88a4cd", - "metadata": {}, - "outputs": [], - "source": [ - "rt_stop_times5[\"schd_headway\"] = rt_stop_times5.groupby(groupby_cols)[\n", - " \"scheduled_arrival_sec\"\n", - "].diff()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1f3969a8-1850-4b74-96e7-edcaf94e25cd", - "metadata": {}, - "outputs": [], - "source": [ - "rt_stop_times5.head(10)[\n", - " [\n", - " \"scheduled_arrival_sec\",\n", - " \"rt_arrival_sec\",\n", - " \"delay\",\n", - " \"actual_headway\",\n", - " \"schd_headway\",\n", - " ]\n", - "]" - ] - }, - { - "cell_type": "markdown", - "id": "8a5f6f72-d463-4076-80ce-d22ab1f718b1", - "metadata": {}, - "source": [ - "### Fill in `nans` with 0 \n", - "* I am not sure if `nans` impact calculations of the mean scheduled headway and whatnot?\n", - "* These `nans` are because the first `operator-route-stop_id-stop_sequence` combo won't have anything to compare it to.\n", - "* Katrina: I would fill in the actual/schedule headway columns with 0 rather than dropping the first row in each grouping. I wonder if it makes sense to use a more descriptive column name than headway, such as \"minutes since last vehicle\"" + "percentile_01_df = rt_stop_times3.loc[rt_stop_times3.delay_min < percentile_01]" ] }, { "cell_type": "code", - "execution_count": null, - "id": "6603e96d-085d-460f-965f-ba01a10f6dbc", - "metadata": {}, - "outputs": [], - "source": [ - "rt_stop_times5 = rt_stop_times5.fillna(0)" - ] - }, - { - "cell_type": "markdown", - "id": "28362518-a54b-4f5d-a4d7-24a3d8ddefd0", + "execution_count": 68, + "id": "8cb4f43f-7b7a-49f3-b3e6-5162df5a99db", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1165" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "### Transit Matters Method\n", - "* To Do: add back in route & operator information" + "len(percentile_01_df)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "f0f0f4b7-fa64-4b01-a141-5dd78c59693b", + "execution_count": 69, + "id": "1bab6d68-6cb3-44b5-9f08-6e9d82cc9aba", "metadata": {}, "outputs": [], "source": [ - "transit_matters_df1 = rt_stop_times5.copy()" + "delay_above_99 = rt_stop_times3.loc[rt_stop_times3.delay_min > percentile_99]" ] }, { "cell_type": "code", - "execution_count": null, - "id": "7bea63e5-45d0-4d06-8c1c-fd34a69ffde7", + "execution_count": 70, + "id": "0bbbd0cf-2e53-46da-ba52-0ba0a8614fc9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "count 1167.00\n", + "mean 83.06\n", + "std 133.17\n", + "min 23.33\n", + "1% 23.44\n", + "2% 23.59\n", + "5% 23.92\n", + "10% 24.68\n", + "50% 30.17\n", + "90% 274.95\n", + "95% 450.83\n", + "98% 460.65\n", + "99% 616.57\n", + "max 727.87\n", + "Name: delay_min, dtype: float64" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "transit_matters_df1[\"pct_actual_schd_headway\"] = (\n", - " transit_matters_df1.actual_headway / transit_matters_df1.schd_headway\n", - ")" + "delay_above_99.delay_min.describe(percentiles)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "c67c6299-68f0-414f-a9c1-e0b27511b9e5", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "transit_matters_df1[\"bunched_y_n\"] = np.where(\n", - " transit_matters_df1[\"pct_actual_schd_headway\"] < 0.25, \"bunched\", \"not bunched\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "b3fdb4cb-fb86-4b76-85f1-7451a4ef7dbe", + "execution_count": 71, + "id": "5a991b19-685a-4e8a-89dc-748fc5d9941b", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1167" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#### There are some very extreme values: how to deal with this?\n" + "len(delay_above_99)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "70bc7bfe-2306-498a-b0d7-e012d266d5fc", + "execution_count": 72, + "id": "dfc4d1fe-5720-4ba0-8cc4-29dd96175287", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "count 1165.00\n", + "mean -41.63\n", + "std 114.86\n", + "min -1345.67\n", + "1% -593.61\n", + "2% -393.00\n", + "5% -183.12\n", + "10% -52.63\n", + "50% -10.10\n", + "90% -5.22\n", + "95% -5.03\n", + "98% -4.92\n", + "99% -4.90\n", + "max -4.87\n", + "Name: delay_min, dtype: float64" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "transit_matters_df1.pct_actual_schd_headway.describe()" + "percentile_01_df.delay_min.describe(percentiles)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "c3b62e33-313a-4f97-a1f5-37101abe7db8", + "execution_count": 73, + "id": "332bdda3-d690-441c-a299-c9c5f8be7709", "metadata": {}, "outputs": [], "source": [ - "len(transit_matters_df1.loc[transit_matters_df1.pct_actual_schd_headway < 0])" + "percentile_10 = percentile_01_df[\"delay_min\"].quantile(0.10)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "d7d56ce7-66e9-4084-a725-a9eff7c4c5b2", + "execution_count": 74, + "id": "2bc0f183-294a-4617-bc17-aa73a570f6e0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "-52.626666666666665" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "transit_matters_df1.bunched_y_n.value_counts() / len(transit_matters_df1)" + "percentile_10" ] }, { "cell_type": "code", - "execution_count": null, - "id": "ee874ad7-e2c5-4a4a-8e79-ac79d9d49f30", + "execution_count": 75, + "id": "4440bc59-2335-4ef6-abbc-55d334e4bf7d", "metadata": {}, "outputs": [], "source": [ - "sf_38r_test = transit_matters_df1.loc[\n", - " (transit_matters_df1.stop_id == \"14295\")\n", - " & (\n", - " transit_matters_df1.schedule_gtfs_dataset_key\n", - " == \"7cc0cb1871dfd558f11a2885c145d144\"\n", - " )\n", - " & (transit_matters_df1.stop_sequence == 11)\n", - " & (transit_matters_df1.route_id == \"38R\")\n", - "]" + "percentile_90 = delay_above_99[\"delay_min\"].quantile(0.90)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "099f0701-ed36-46fe-9479-88021d9f3bdb", + "execution_count": 76, + "id": "5d231749-31b5-4899-b36b-4ea5fb55981c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "274.9466666666675" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Row 444797: scheduled to arrive at 69480, but actually arrives 69890" + "percentile_90" ] }, { "cell_type": "markdown", - "id": "869ea449-b2b2-40a9-96fc-dd963b9f21ed", + "id": "9820f995-060e-4c8d-adf8-1d50adf4400c", "metadata": {}, "source": [ - "#### QUESTION: Wonder if I should convert time stamps to hours so it's at least in military time instead of seconds? Although I'm not really sure if this is sound." + "* If scheduled_arrival_sec is in the 86000 ballpark and rt_arrival_sec is less than 60*60*3, then subtract a day " ] }, { - "cell_type": "code", - "execution_count": null, - "id": "aef09625-a050-463b-8a60-f15dc7839520", + "cell_type": "markdown", + "id": "6aed2f18-b89c-4269-beda-9d12b6b56082", "metadata": {}, - "outputs": [], "source": [ - "sf_38r_test[\"sched_arrival_min\"] = sf_38r_test.scheduled_arrival_sec / 60" + "### Filter out values in `delay` that are very extreme." ] }, { "cell_type": "code", - "execution_count": null, - "id": "6658bb0f-0b3f-4dc2-914c-b144f785d608", + "execution_count": 77, + "id": "bbd1cbc3-9762-44b8-a564-c33e57879d10", "metadata": {}, "outputs": [], "source": [ - "sf_38r_test[\"rt_arrival_min\"] = sf_38r_test.rt_arrival_sec / 60" + "# Filter to only delays that are an hour or less\n", + "rt_stop_times4 = rt_stop_times3[\n", + " (rt_stop_times3[\"delay_min\"] >= percentile_10)\n", + " & (rt_stop_times3[\"delay_min\"] <= percentile_90)\n", + "].reset_index(drop=True)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "9bba60cc-c879-441d-9f50-b0e6ac7a5b90", + "execution_count": 78, + "id": "7668b5b5-4f1a-4fc7-9fac-b3a113c2b0b2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "-234" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "sf_38r_test[\"actual_headway_min\"] = sf_38r_test.actual_headway / 60" + "len(rt_stop_times4) - len(rt_stop_times3)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "20963551-f43d-475f-a307-6b30234b73a8", + "execution_count": 79, + "id": "da609102-30a4-4de3-b519-297822c450e6", "metadata": {}, - "outputs": [], - "source": [ - "sf_38r_test[\"schd_headway_min\"] = sf_38r_test.schd_headway / 60" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dc43f71d-8f7b-4536-9d15-14a11703fa3d", - "metadata": { - "tags": [] - }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "count 116453.00\n", + "mean 2.94\n", + "std 6.38\n", + "min -52.42\n", + "1% -4.59\n", + "2% -3.30\n", + "5% -2.10\n", + "10% -1.25\n", + "50% 1.78\n", + "90% 8.30\n", + "95% 11.40\n", + "98% 16.77\n", + "99% 21.97\n", + "max 271.13\n", + "Name: delay_min, dtype: float64" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "sf_38r_test[\n", - " [\n", - " \"sched_arrival_min\",\n", - " \"rt_arrival_min\",\n", - " \"actual_headway_min\",\n", - " \"schd_headway_min\",\n", - " \"pct_actual_schd_headway\",\n", - " ]\n", - "].tail(5)" + "rt_stop_times4.delay_min.describe(percentiles)" ] }, { "cell_type": "markdown", - "id": "3bbf49be-6ec9-41a7-b14e-a74b5fff49d6", + "id": "81bd753a-08ee-4d09-ac79-213e1e605405", "metadata": {}, "source": [ - "* Row 466475 was scheduled to arrive after row 466476" + "### Calculate the actual & scheduled headway the `operator-route-direction_id-stop_sequence-stop_id-` grain" ] }, { - "cell_type": "markdown", - "id": "db10254b-d5d4-4619-9c6e-10fab19ec6b2", + "cell_type": "code", + "execution_count": 80, + "id": "b9171520-7358-4f22-9d9c-c5156e710f1b", "metadata": {}, + "outputs": [], "source": [ - "#### Groupby grain and see how many trips for that grain are considered \"bunched\" or not." + "groupby_cols = [\n", + " \"schedule_gtfs_dataset_key\",\n", + " \"route_id\",\n", + " \"shape_array_key\",\n", + " \"direction_id\",\n", + " \"route_primary_direction\",\n", + " \"stop_sequence\",\n", + " \"stop_id\",\n", + "]" ] }, { "cell_type": "code", - "execution_count": null, - "id": "e72f4e50-da82-4a59-8aae-fc6b79f91cc5", + "execution_count": 81, + "id": "f7d3055e-c079-4232-9123-2f9b2d3e07c8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['trip_id', 'stop_id', 'stop_sequence', 'scheduled_arrival_sec',\n", + " 'schedule_gtfs_dataset_key', 'trip_instance_key', 'rt_arrival_sec',\n", + " 'route_id', 'shape_array_key', 'feed_key', 'route_long_name',\n", + " 'direction_id', 'route_type', 'route_primary_direction',\n", + " 'med_headway_minutes', 'organization_name', 'name', 'caltrans_district',\n", + " 'service_date', 'route_type_str', 'scheduled_arrival_sec_copy',\n", + " 'converted_schd_arrival', 'converted_rt_arrival', 'delay_min'],\n", + " dtype='object')" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "transit_matters_df2 = (\n", - " transit_matters_df1.groupby(\n", - " [\n", - " \"schedule_gtfs_dataset_key\",\n", - " \"route_long_name\",\n", - " \"shape_array_key\",\n", - " \"route_id\",\n", - " \"stop_id\",\n", - " \"direction_id\",\n", - " \"route_primary_direction\",\n", - " \"bunched_y_n\",\n", - " ]\n", - " )\n", - " .agg({\"trip_instance_key\": \"nunique\"})\n", - " .reset_index()\n", - ")" + "rt_stop_times4.columns" ] }, { "cell_type": "code", - "execution_count": null, - "id": "ebf0e403-89c5-4b2c-b962-44affe3c0b42", + "execution_count": 82, + "id": "f5b62023-ab77-4634-a526-3d822bb7a63f", "metadata": {}, "outputs": [], "source": [ - "# Filter out only rows that are bunched.\n", - "bunched_only = transit_matters_df2.loc[\n", - " transit_matters_df2.bunched_y_n == \"bunched\"\n", - "].reset_index(drop=True)" + "# Subtract rt_arrival_sec from the previous row to the target row\n", + "# using groupby columns\n", + "rt_stop_times4[\"actual_arrival_lag\"] = rt_stop_times4.groupby(groupby_cols)[\n", + " \"converted_rt_arrival\"\n", + "].diff()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "7591762b-abab-4083-8b42-53dca0523fca", + "execution_count": 83, + "id": "6328c5fb-205e-4a78-a3d3-62fafa88a4cd", "metadata": {}, "outputs": [], "source": [ - "bunched_only = bunched_only.rename(columns={\"trip_instance_key\": \"bunched_trips\"})" + "rt_stop_times4[\"scheduled_arrival_lag\"] = rt_stop_times4.groupby(groupby_cols)[\n", + " \"converted_schd_arrival\"\n", + "].diff()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "6f94980e-b9c4-4c1e-a497-ea4b3f88c55b", + "execution_count": 84, + "id": "1f3969a8-1850-4b74-96e7-edcaf94e25cd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
converted_rt_arrivalactual_arrival_lagconverted_schd_arrivalscheduled_arrival_lag
02024-05-22 00:06:52NaT2024-05-22 00:08:00NaT
12024-05-22 00:44:220 days 00:37:302024-05-22 00:41:000 days 00:33:00
22024-05-22 01:12:290 days 00:28:072024-05-22 01:11:000 days 00:30:00
32024-05-22 01:39:470 days 00:27:182024-05-22 01:41:000 days 00:30:00
42024-05-22 02:11:400 days 00:31:532024-05-22 02:11:000 days 00:30:00
52024-05-22 02:39:140 days 00:27:342024-05-22 02:41:000 days 00:30:00
62024-05-22 03:12:430 days 00:33:292024-05-22 03:11:000 days 00:30:00
72024-05-22 03:41:050 days 00:28:222024-05-22 03:42:000 days 00:31:00
82024-05-22 04:11:440 days 00:30:392024-05-22 04:12:000 days 00:30:00
92024-05-22 04:45:070 days 00:33:232024-05-22 04:49:000 days 00:37:00
\n", + "
" + ], + "text/plain": [ + " converted_rt_arrival actual_arrival_lag converted_schd_arrival \\\n", + "0 2024-05-22 00:06:52 NaT 2024-05-22 00:08:00 \n", + "1 2024-05-22 00:44:22 0 days 00:37:30 2024-05-22 00:41:00 \n", + "2 2024-05-22 01:12:29 0 days 00:28:07 2024-05-22 01:11:00 \n", + "3 2024-05-22 01:39:47 0 days 00:27:18 2024-05-22 01:41:00 \n", + "4 2024-05-22 02:11:40 0 days 00:31:53 2024-05-22 02:11:00 \n", + "5 2024-05-22 02:39:14 0 days 00:27:34 2024-05-22 02:41:00 \n", + "6 2024-05-22 03:12:43 0 days 00:33:29 2024-05-22 03:11:00 \n", + "7 2024-05-22 03:41:05 0 days 00:28:22 2024-05-22 03:42:00 \n", + "8 2024-05-22 04:11:44 0 days 00:30:39 2024-05-22 04:12:00 \n", + "9 2024-05-22 04:45:07 0 days 00:33:23 2024-05-22 04:49:00 \n", + "\n", + " scheduled_arrival_lag \n", + "0 NaT \n", + "1 0 days 00:33:00 \n", + "2 0 days 00:30:00 \n", + "3 0 days 00:30:00 \n", + "4 0 days 00:30:00 \n", + "5 0 days 00:30:00 \n", + "6 0 days 00:30:00 \n", + "7 0 days 00:31:00 \n", + "8 0 days 00:30:00 \n", + "9 0 days 00:37:00 " + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "transit_matters_agg = [\n", - " \"schedule_gtfs_dataset_key\",\n", - " \"route_long_name\",\n", - " \"shape_array_key\",\n", - " \"route_id\",\n", - " \"stop_id\",\n", - " \"direction_id\",\n", - " \"route_primary_direction\",\n", + "rt_stop_times4.head(10)[\n", + " [\n", + " \"converted_rt_arrival\",\n", + " \"actual_arrival_lag\",\n", + " \"converted_schd_arrival\",\n", + " \"scheduled_arrival_lag\",\n", + " ]\n", "]" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "dcca9769-aa67-48d5-9b06-acc86cade877", + "cell_type": "markdown", + "id": "8a5f6f72-d463-4076-80ce-d22ab1f718b1", "metadata": {}, - "outputs": [], "source": [ - "# Aggregate all trips on the grain\n", - "transit_matters_all_trips = (\n", - " transit_matters_df1.groupby(transit_matters_agg)\n", - " .agg({\"trip_instance_key\": \"nunique\"})\n", - " .reset_index()\n", - " .rename(columns={\"trip_instance_key\": \"all_trips\"})\n", - ")" + "### Fill in `nans` with 0 \n", + "* I am not sure if `nans` impact calculations of the mean scheduled headway and whatnot?\n", + "* These `nans` are because the first `operator-route-stop_id-stop_sequence` combo won't have anything to compare it to.\n", + "* Katrina: I would fill in the actual/schedule headway columns with 0 rather than dropping the first row in each grouping. I wonder if it makes sense to use a more descriptive column name than headway, such as \"minutes since last vehicle\"" ] }, { "cell_type": "code", - "execution_count": null, - "id": "2f57e136-fc83-4eac-bc05-84f530e2f4b0", + "execution_count": 85, + "id": "6603e96d-085d-460f-965f-ba01a10f6dbc", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 116453 entries, 0 to 116452\n", + "Data columns (total 26 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 trip_id 116453 non-null object \n", + " 1 stop_id 116453 non-null object \n", + " 2 stop_sequence 116453 non-null int64 \n", + " 3 scheduled_arrival_sec 116453 non-null float64 \n", + " 4 schedule_gtfs_dataset_key 116453 non-null object \n", + " 5 trip_instance_key 116453 non-null object \n", + " 6 rt_arrival_sec 116453 non-null int64 \n", + " 7 route_id 116453 non-null object \n", + " 8 shape_array_key 116453 non-null object \n", + " 9 feed_key 116453 non-null object \n", + " 10 route_long_name 116453 non-null object \n", + " 11 direction_id 116453 non-null float64 \n", + " 12 route_type 116453 non-null object \n", + " 13 route_primary_direction 116453 non-null object \n", + " 14 med_headway_minutes 116453 non-null float64 \n", + " 15 organization_name 116453 non-null object \n", + " 16 name 116453 non-null object \n", + " 17 caltrans_district 116453 non-null object \n", + " 18 service_date 116453 non-null datetime64[ns] \n", + " 19 route_type_str 116453 non-null object \n", + " 20 scheduled_arrival_sec_copy 116453 non-null float64 \n", + " 21 converted_schd_arrival 116453 non-null datetime64[ns] \n", + " 22 converted_rt_arrival 116453 non-null datetime64[ns] \n", + " 23 delay_min 116453 non-null float64 \n", + " 24 actual_arrival_lag 110343 non-null timedelta64[ns]\n", + " 25 scheduled_arrival_lag 110343 non-null timedelta64[ns]\n", + "dtypes: datetime64[ns](3), float64(5), int64(2), object(14), timedelta64[ns](2)\n", + "memory usage: 23.1+ MB\n" + ] + } + ], "source": [ - "# Merge back, using left merge to keep bunching\n", - "bunched_only = pd.merge(\n", - " bunched_only, transit_matters_all_trips, on=transit_matters_agg, how=\"left\"\n", - ")" + "rt_stop_times4.info()" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "55b8feb0-1e0a-4717-b966-ee360becfde5", + "cell_type": "markdown", + "id": "28362518-a54b-4f5d-a4d7-24a3d8ddefd0", "metadata": {}, - "outputs": [], "source": [ - "bunched_only[\"pct_trips_bunched\"] = (\n", - " bunched_only.bunched_trips / bunched_only.all_trips * 100\n", - ")" + "### Transit Matters Method" ] }, { "cell_type": "code", - "execution_count": null, - "id": "3674d237-8b1c-4943-afc8-45b85b4b13d7", + "execution_count": 86, + "id": "f0f0f4b7-fa64-4b01-a141-5dd78c59693b", "metadata": {}, "outputs": [], "source": [ - "bunched_only = bunched_only.drop(columns=[\"all_trips\"])" + "transit_matters_df1 = rt_stop_times4.copy()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "7213e2ad-8485-4e0a-b8b8-0aaf6dbae3ae", + "execution_count": 87, + "id": "7bea63e5-45d0-4d06-8c1c-fd34a69ffde7", "metadata": {}, "outputs": [], "source": [ - "# Merge back all rows that don't have bunching trips.\n", - "transit_matters_m1 = pd.merge(\n", - " transit_matters_all_trips,\n", - " bunched_only,\n", - " on=transit_matters_agg,\n", - " how=\"left\",\n", + "transit_matters_df1[\"pct_actual_schd_headway\"] = (\n", + " transit_matters_df1.actual_arrival_lag / transit_matters_df1.scheduled_arrival_lag\n", ")" ] }, { "cell_type": "code", - "execution_count": null, - "id": "f5253b6a-8cad-469f-a306-6fa712e9799a", + "execution_count": 88, + "id": "c67c6299-68f0-414f-a9c1-e0b27511b9e5", "metadata": {}, "outputs": [], "source": [ - "transit_matters_m1 = transit_matters_m1.drop(\n", - " columns=[\n", - " \"bunched_y_n\",\n", - " ]\n", + "import numpy as np\n", + "\n", + "transit_matters_df1[\"bunched_y_n\"] = np.where(\n", + " transit_matters_df1[\"pct_actual_schd_headway\"] < 0.25, \"bunched\", \"not bunched\"\n", ")" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "4a25225a-7883-457f-a27a-f606164cdd37", + "cell_type": "markdown", + "id": "b3fdb4cb-fb86-4b76-85f1-7451a4ef7dbe", "metadata": {}, - "outputs": [], "source": [ - "transit_matters_m1.pct_trips_bunched = transit_matters_m1.pct_trips_bunched.fillna(0)" + "#### There are some very extreme values: how to deal with this?\n" ] }, { "cell_type": "code", - "execution_count": null, - "id": "2e2abda3-b3de-4f03-baf4-7f764dd10255", + "execution_count": 89, + "id": "70bc7bfe-2306-498a-b0d7-e012d266d5fc", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "count 110343.00\n", + "mean 0.99\n", + "std 0.28\n", + "min -1.45\n", + "25% 0.90\n", + "50% 1.00\n", + "75% 1.10\n", + "max 3.24\n", + "Name: pct_actual_schd_headway, dtype: float64" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "transit_matters_m1.pct_trips_bunched.describe()" + "transit_matters_df1.pct_actual_schd_headway.describe()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "6f4c4386-acfb-483a-947d-0279710df61b", + "execution_count": 90, + "id": "d7d56ce7-66e9-4084-a725-a9eff7c4c5b2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "not bunched 0.98\n", + "bunched 0.02\n", + "Name: bunched_y_n, dtype: float64" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "transit_matters_m1.loc[transit_matters_m1.pct_trips_bunched >= 10].shape" + "transit_matters_df1.bunched_y_n.value_counts() / len(transit_matters_df1)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "4284da28-0244-4291-aca9-4ffd752fdfb4", + "execution_count": 91, + "id": "ee874ad7-e2c5-4a4a-8e79-ac79d9d49f30", "metadata": {}, "outputs": [], "source": [ - "transit_matters_m1.loc[\n", - " (transit_matters_m1.schedule_gtfs_dataset_key == \"7cc0cb1871dfd558f11a2885c145d144\")\n", - " & (transit_matters_m1.shape_array_key == \"955e2fc8f9f8a4be2c67c7212be874f6\")\n", - " & (transit_matters_m1.route_id == \"1\")\n", - " & (transit_matters_m1.direction_id == 1)\n", - " & (transit_matters_m1.stop_id == \"13853\")\n", + "example1 = transit_matters_df1.loc[\n", + " (transit_matters_df1.stop_id == \"5637\")\n", + " & (\n", + " transit_matters_df1.schedule_gtfs_dataset_key\n", + " == \"0666caf3ec1ecc96b74f4477ee4bc939\"\n", + " )\n", + " & (transit_matters_df1.stop_sequence == 32)\n", + " & (transit_matters_df1.route_id == \"204-13172\")\n", "]" ] }, { "cell_type": "code", - "execution_count": null, - "id": "1388465f-62b5-42aa-8db4-e5a0963187b3", + "execution_count": 92, + "id": "aef09625-a050-463b-8a60-f15dc7839520", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_2599/3773661829.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " example1[\"sched_arrival_min\"] = example1.scheduled_arrival_sec / 60\n" + ] + } + ], "source": [ - "bunched_only.loc[\n", - " (bunched_only.schedule_gtfs_dataset_key == \"7cc0cb1871dfd558f11a2885c145d144\")\n", - " & (bunched_only.shape_array_key == \"955e2fc8f9f8a4be2c67c7212be874f6\")\n", - " & (bunched_only.route_id == \"1\")\n", - " & (bunched_only.direction_id == 1)\n", - " & (bunched_only.stop_id == \"13853\")\n", - "]" + "example1[\"sched_arrival_min\"] = example1.scheduled_arrival_sec / 60" ] }, { - "cell_type": "markdown", - "id": "b0579e78-2a95-4d8b-9761-2824aa39a8eb", + "cell_type": "code", + "execution_count": 93, + "id": "6658bb0f-0b3f-4dc2-914c-b144f785d608", "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_2599/579629931.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " example1[\"rt_arrival_min\"] = example1.rt_arrival_sec / 60\n" + ] + } + ], "source": [ - "### Use 2 minute benchmark\n", - "* [Source](https://static1.squarespace.com/static/533b9a24e4b01d79d0ae4376/t/645e82de1f570b31497c44dc/1683915486889/TransitMatters-Headwaymanagement.pdf)\n", - "* Justifying the use of\n", - "headway maintenance. For example, in April\n", - "2022 the 66 bus significantly bunched around\n", - "several stops. When bunching is defined as\n", - "buses that run within two minutes or less of\n", - "each other, inbound buses towards Nubian\n", - "Square bunched 10% of the time at Brigham\n", - "Circle, 9% at Brookline Village and Roxbury\n", - "Crossing, and 8% of the time at Coolidge\n", - "Corner. Bunching is even more dramatic\n", - "outbound towards Harvard Square where\n", - "buses bunched over 35% of the time at Winship\n", - "St, 13% at Coolidge Corner and Harvard Ave at\n", - "Commonwealth Ave, and 12% at North Harvard\n", - "St at Western Ave. View more data about bus\n", - "bunching through the TransitMatters Data\n", - "Dashboard here.\n", - "\n", - "* To Do: add back in route & operator information" + "example1[\"rt_arrival_min\"] = example1.rt_arrival_sec / 60" ] }, { "cell_type": "code", - "execution_count": null, - "id": "e0706e7e-0d56-43b2-bf3c-4205e9277c64", + "execution_count": 94, + "id": "2a301a59-7357-43a0-8f5c-3ba31b889878", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(106, 30)" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "example1.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "dc43f71d-8f7b-4536-9d15-14a11703fa3d", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
converted_rt_arrivalactual_arrival_lagconverted_schd_arrivalscheduled_arrival_lagpct_actual_schd_headwaybunched_y_n
31582024-05-22 00:07:06NaT2024-05-22 00:02:00NaTNaNnot bunched
31592024-05-22 00:34:010 days 00:26:552024-05-22 00:32:000 days 00:30:000.90not bunched
31602024-05-22 01:14:460 days 00:40:452024-05-22 01:04:000 days 00:32:001.27not bunched
31612024-05-22 01:35:450 days 00:20:592024-05-22 01:34:000 days 00:30:000.70not bunched
31622024-05-22 02:04:520 days 00:29:072024-05-22 02:04:000 days 00:30:000.97not bunched
31632024-05-22 02:34:570 days 00:30:052024-05-22 02:34:000 days 00:30:001.00not bunched
31642024-05-22 03:03:090 days 00:28:122024-05-22 03:04:000 days 00:30:000.94not bunched
31652024-05-22 03:35:530 days 00:32:442024-05-22 03:34:000 days 00:30:001.09not bunched
31662024-05-22 04:04:360 days 00:28:432024-05-22 04:04:000 days 00:30:000.96not bunched
31672024-05-22 04:39:500 days 00:35:142024-05-22 04:34:000 days 00:30:001.17not bunched
31682024-05-22 05:02:080 days 00:22:182024-05-22 05:02:000 days 00:28:000.80not bunched
31692024-05-22 05:17:540 days 00:15:462024-05-22 05:17:000 days 00:15:001.05not bunched
31702024-05-22 05:28:200 days 00:10:262024-05-22 05:29:000 days 00:12:000.87not bunched
31712024-05-22 05:38:080 days 00:09:482024-05-22 05:39:000 days 00:10:000.98not bunched
31722024-05-22 05:52:070 days 00:13:592024-05-22 05:48:000 days 00:09:001.55not bunched
31732024-05-22 05:57:540 days 00:05:472024-05-22 05:59:000 days 00:11:000.53not bunched
31742024-05-22 06:10:570 days 00:13:032024-05-22 06:09:000 days 00:10:001.30not bunched
31752024-05-22 06:21:060 days 00:10:092024-05-22 06:19:000 days 00:10:001.01not bunched
31762024-05-22 06:28:440 days 00:07:382024-05-22 06:29:000 days 00:10:000.76not bunched
31772024-05-22 06:31:180 days 00:02:342024-05-22 06:48:000 days 00:19:000.14bunched
31782024-05-22 06:39:240 days 00:08:062024-05-22 06:39:00-1 days +23:51:00-0.90bunched
31792024-05-22 07:00:580 days 00:21:342024-05-22 06:57:000 days 00:18:001.20not bunched
31802024-05-22 07:04:290 days 00:03:312024-05-22 07:06:000 days 00:09:000.39not bunched
31812024-05-22 07:19:540 days 00:15:252024-05-22 07:15:000 days 00:09:001.71not bunched
31822024-05-22 07:22:200 days 00:02:262024-05-22 07:25:000 days 00:10:000.24bunched
31832024-05-22 07:42:160 days 00:19:562024-05-22 07:35:000 days 00:10:001.99not bunched
31842024-05-22 07:46:300 days 00:04:142024-05-22 07:45:000 days 00:10:000.42not bunched
31852024-05-22 07:55:580 days 00:09:282024-05-22 07:56:000 days 00:11:000.86not bunched
31862024-05-22 08:14:020 days 00:18:042024-05-22 08:05:000 days 00:09:002.01not bunched
31872024-05-22 08:16:180 days 00:02:162024-05-22 08:15:000 days 00:10:000.23bunched
31882024-05-22 08:31:470 days 00:15:292024-05-22 08:25:000 days 00:10:001.55not bunched
31892024-05-22 08:34:420 days 00:02:552024-05-22 08:35:000 days 00:10:000.29not bunched
31902024-05-22 08:49:270 days 00:14:452024-05-22 08:45:000 days 00:10:001.48not bunched
31912024-05-22 08:55:560 days 00:06:292024-05-22 08:56:000 days 00:11:000.59not bunched
31922024-05-22 09:08:250 days 00:12:292024-05-22 09:06:000 days 00:10:001.25not bunched
31932024-05-22 09:18:180 days 00:09:532024-05-22 09:16:000 days 00:10:000.99not bunched
31942024-05-22 09:24:330 days 00:06:152024-05-22 09:26:000 days 00:10:000.62not bunched
31952024-05-22 09:34:380 days 00:10:052024-05-22 09:36:000 days 00:10:001.01not bunched
31962024-05-22 09:47:130 days 00:12:352024-05-22 09:46:000 days 00:10:001.26not bunched
31972024-05-22 09:54:440 days 00:07:312024-05-22 09:56:000 days 00:10:000.75not bunched
31982024-05-22 10:10:500 days 00:16:062024-05-22 10:06:000 days 00:10:001.61not bunched
31992024-05-22 10:14:460 days 00:03:562024-05-22 10:16:000 days 00:10:000.39not bunched
32002024-05-22 10:25:440 days 00:10:582024-05-22 10:26:000 days 00:10:001.10not bunched
32012024-05-22 10:41:120 days 00:15:282024-05-22 10:36:000 days 00:10:001.55not bunched
32022024-05-22 10:44:540 days 00:03:422024-05-22 10:45:000 days 00:09:000.41not bunched
32032024-05-22 10:54:150 days 00:09:212024-05-22 10:55:000 days 00:10:000.94not bunched
32042024-05-22 11:04:510 days 00:10:362024-05-22 11:05:000 days 00:10:001.06not bunched
32052024-05-22 11:12:430 days 00:07:522024-05-22 11:14:000 days 00:09:000.87not bunched
32062024-05-22 11:24:260 days 00:11:432024-05-22 11:24:000 days 00:10:001.17not bunched
32072024-05-22 11:42:040 days 00:17:382024-05-22 11:44:000 days 00:20:000.88not bunched
32082024-05-22 11:57:130 days 00:15:092024-05-22 11:54:000 days 00:10:001.51not bunched
32092024-05-22 12:01:540 days 00:04:412024-05-22 12:04:000 days 00:10:000.47not bunched
32102024-05-22 12:15:130 days 00:13:192024-05-22 12:14:000 days 00:10:001.33not bunched
32112024-05-22 12:30:300 days 00:15:172024-05-22 12:24:000 days 00:10:001.53not bunched
32122024-05-22 12:31:420 days 00:01:122024-05-22 12:33:000 days 00:09:000.13bunched
32132024-05-22 12:41:510 days 00:10:092024-05-22 12:43:000 days 00:10:001.01not bunched
32142024-05-22 12:52:370 days 00:10:462024-05-22 12:52:000 days 00:09:001.20not bunched
32152024-05-22 13:04:340 days 00:11:572024-05-22 13:02:000 days 00:10:001.20not bunched
32162024-05-22 13:10:380 days 00:06:042024-05-22 13:12:000 days 00:10:000.61not bunched
32172024-05-22 13:22:410 days 00:12:032024-05-22 13:22:000 days 00:10:001.21not bunched
32182024-05-22 13:34:520 days 00:12:112024-05-22 13:32:000 days 00:10:001.22not bunched
32192024-05-22 13:40:170 days 00:05:252024-05-22 13:42:000 days 00:10:000.54not bunched
32202024-05-22 13:54:380 days 00:14:212024-05-22 13:52:000 days 00:10:001.44not bunched
32212024-05-22 14:03:040 days 00:08:262024-05-22 14:02:000 days 00:10:000.84not bunched
32222024-05-22 14:13:270 days 00:10:232024-05-22 14:12:000 days 00:10:001.04not bunched
32232024-05-22 14:25:540 days 00:12:272024-05-22 14:22:000 days 00:10:001.25not bunched
32242024-05-22 14:34:260 days 00:08:322024-05-22 14:33:000 days 00:11:000.78not bunched
32252024-05-22 14:43:280 days 00:09:022024-05-22 14:43:000 days 00:10:000.90not bunched
32262024-05-22 14:53:010 days 00:09:332024-05-22 14:52:000 days 00:09:001.06not bunched
32272024-05-22 15:05:580 days 00:12:572024-05-22 15:02:000 days 00:10:001.29not bunched
32282024-05-22 15:12:290 days 00:06:312024-05-22 15:12:000 days 00:10:000.65not bunched
32292024-05-22 15:24:450 days 00:12:162024-05-22 15:22:000 days 00:10:001.23not bunched
32302024-05-22 15:34:360 days 00:09:512024-05-22 15:32:000 days 00:10:000.98not bunched
32312024-05-22 15:43:560 days 00:09:202024-05-22 15:42:000 days 00:10:000.93not bunched
32322024-05-22 16:00:230 days 00:16:272024-05-22 15:52:000 days 00:10:001.65not bunched
32332024-05-22 16:07:490 days 00:07:262024-05-22 16:02:000 days 00:10:000.74not bunched
32342024-05-22 16:16:510 days 00:09:022024-05-22 16:12:000 days 00:10:000.90not bunched
32352024-05-22 16:24:110 days 00:07:202024-05-22 16:22:000 days 00:10:000.73not bunched
32362024-05-22 16:38:580 days 00:14:472024-05-22 16:32:000 days 00:10:001.48not bunched
32372024-05-22 16:43:420 days 00:04:442024-05-22 16:42:000 days 00:10:000.47not bunched
32382024-05-22 16:52:410 days 00:08:592024-05-22 16:52:000 days 00:10:000.90not bunched
32392024-05-22 17:07:310 days 00:14:502024-05-22 17:03:000 days 00:11:001.35not bunched
32402024-05-22 17:13:330 days 00:06:022024-05-22 17:13:000 days 00:10:000.60not bunched
32412024-05-22 17:27:070 days 00:13:342024-05-22 17:23:000 days 00:10:001.36not bunched
32422024-05-22 17:42:070 days 00:15:002024-05-22 17:33:000 days 00:10:001.50not bunched
32432024-05-22 17:47:200 days 00:05:132024-05-22 17:43:000 days 00:10:000.52not bunched
32442024-05-22 18:00:560 days 00:13:362024-05-22 17:53:000 days 00:10:001.36not bunched
32452024-05-22 18:10:460 days 00:09:502024-05-22 18:03:000 days 00:10:000.98not bunched
32462024-05-22 18:25:310 days 00:14:452024-05-22 18:13:000 days 00:10:001.48not bunched
32472024-05-22 18:26:200 days 00:00:492024-05-22 18:23:000 days 00:10:000.08bunched
32482024-05-22 18:34:450 days 00:08:252024-05-22 18:34:000 days 00:11:000.77not bunched
32492024-05-22 19:03:140 days 00:28:292024-05-22 18:48:000 days 00:14:002.03not bunched
32502024-05-22 19:04:470 days 00:01:332024-05-22 19:03:000 days 00:15:000.10bunched
32512024-05-22 19:31:430 days 00:26:562024-05-22 19:22:000 days 00:19:001.42not bunched
32522024-05-22 19:34:180 days 00:02:352024-05-22 19:33:000 days 00:11:000.23bunched
32532024-05-22 19:55:090 days 00:20:512024-05-22 19:50:000 days 00:17:001.23not bunched
32542024-05-22 20:09:020 days 00:13:532024-05-22 20:04:000 days 00:14:000.99not bunched
32552024-05-22 20:29:220 days 00:20:202024-05-22 20:31:000 days 00:27:000.75not bunched
32562024-05-22 20:52:100 days 00:22:482024-05-22 20:51:000 days 00:20:001.14not bunched
32572024-05-22 21:14:080 days 00:21:582024-05-22 21:11:000 days 00:20:001.10not bunched
32582024-05-22 21:34:280 days 00:20:202024-05-22 21:31:000 days 00:20:001.02not bunched
32592024-05-22 21:49:000 days 00:14:322024-05-22 21:51:000 days 00:20:000.73not bunched
32602024-05-22 22:09:310 days 00:20:312024-05-22 22:11:000 days 00:20:001.03not bunched
32612024-05-22 22:38:180 days 00:28:472024-05-22 22:36:000 days 00:25:001.15not bunched
32622024-05-22 23:09:060 days 00:30:482024-05-22 23:02:000 days 00:26:001.18not bunched
32632024-05-22 23:34:300 days 00:25:242024-05-22 23:32:000 days 00:30:000.85not bunched
\n", + "
" + ], + "text/plain": [ + " converted_rt_arrival actual_arrival_lag converted_schd_arrival \\\n", + "3158 2024-05-22 00:07:06 NaT 2024-05-22 00:02:00 \n", + "3159 2024-05-22 00:34:01 0 days 00:26:55 2024-05-22 00:32:00 \n", + "3160 2024-05-22 01:14:46 0 days 00:40:45 2024-05-22 01:04:00 \n", + "3161 2024-05-22 01:35:45 0 days 00:20:59 2024-05-22 01:34:00 \n", + "3162 2024-05-22 02:04:52 0 days 00:29:07 2024-05-22 02:04:00 \n", + "3163 2024-05-22 02:34:57 0 days 00:30:05 2024-05-22 02:34:00 \n", + "3164 2024-05-22 03:03:09 0 days 00:28:12 2024-05-22 03:04:00 \n", + "3165 2024-05-22 03:35:53 0 days 00:32:44 2024-05-22 03:34:00 \n", + "3166 2024-05-22 04:04:36 0 days 00:28:43 2024-05-22 04:04:00 \n", + "3167 2024-05-22 04:39:50 0 days 00:35:14 2024-05-22 04:34:00 \n", + "3168 2024-05-22 05:02:08 0 days 00:22:18 2024-05-22 05:02:00 \n", + "3169 2024-05-22 05:17:54 0 days 00:15:46 2024-05-22 05:17:00 \n", + "3170 2024-05-22 05:28:20 0 days 00:10:26 2024-05-22 05:29:00 \n", + "3171 2024-05-22 05:38:08 0 days 00:09:48 2024-05-22 05:39:00 \n", + "3172 2024-05-22 05:52:07 0 days 00:13:59 2024-05-22 05:48:00 \n", + "3173 2024-05-22 05:57:54 0 days 00:05:47 2024-05-22 05:59:00 \n", + "3174 2024-05-22 06:10:57 0 days 00:13:03 2024-05-22 06:09:00 \n", + "3175 2024-05-22 06:21:06 0 days 00:10:09 2024-05-22 06:19:00 \n", + "3176 2024-05-22 06:28:44 0 days 00:07:38 2024-05-22 06:29:00 \n", + "3177 2024-05-22 06:31:18 0 days 00:02:34 2024-05-22 06:48:00 \n", + "3178 2024-05-22 06:39:24 0 days 00:08:06 2024-05-22 06:39:00 \n", + "3179 2024-05-22 07:00:58 0 days 00:21:34 2024-05-22 06:57:00 \n", + "3180 2024-05-22 07:04:29 0 days 00:03:31 2024-05-22 07:06:00 \n", + "3181 2024-05-22 07:19:54 0 days 00:15:25 2024-05-22 07:15:00 \n", + "3182 2024-05-22 07:22:20 0 days 00:02:26 2024-05-22 07:25:00 \n", + "3183 2024-05-22 07:42:16 0 days 00:19:56 2024-05-22 07:35:00 \n", + "3184 2024-05-22 07:46:30 0 days 00:04:14 2024-05-22 07:45:00 \n", + "3185 2024-05-22 07:55:58 0 days 00:09:28 2024-05-22 07:56:00 \n", + "3186 2024-05-22 08:14:02 0 days 00:18:04 2024-05-22 08:05:00 \n", + "3187 2024-05-22 08:16:18 0 days 00:02:16 2024-05-22 08:15:00 \n", + "3188 2024-05-22 08:31:47 0 days 00:15:29 2024-05-22 08:25:00 \n", + "3189 2024-05-22 08:34:42 0 days 00:02:55 2024-05-22 08:35:00 \n", + "3190 2024-05-22 08:49:27 0 days 00:14:45 2024-05-22 08:45:00 \n", + "3191 2024-05-22 08:55:56 0 days 00:06:29 2024-05-22 08:56:00 \n", + "3192 2024-05-22 09:08:25 0 days 00:12:29 2024-05-22 09:06:00 \n", + "3193 2024-05-22 09:18:18 0 days 00:09:53 2024-05-22 09:16:00 \n", + "3194 2024-05-22 09:24:33 0 days 00:06:15 2024-05-22 09:26:00 \n", + "3195 2024-05-22 09:34:38 0 days 00:10:05 2024-05-22 09:36:00 \n", + "3196 2024-05-22 09:47:13 0 days 00:12:35 2024-05-22 09:46:00 \n", + "3197 2024-05-22 09:54:44 0 days 00:07:31 2024-05-22 09:56:00 \n", + "3198 2024-05-22 10:10:50 0 days 00:16:06 2024-05-22 10:06:00 \n", + "3199 2024-05-22 10:14:46 0 days 00:03:56 2024-05-22 10:16:00 \n", + "3200 2024-05-22 10:25:44 0 days 00:10:58 2024-05-22 10:26:00 \n", + "3201 2024-05-22 10:41:12 0 days 00:15:28 2024-05-22 10:36:00 \n", + "3202 2024-05-22 10:44:54 0 days 00:03:42 2024-05-22 10:45:00 \n", + "3203 2024-05-22 10:54:15 0 days 00:09:21 2024-05-22 10:55:00 \n", + "3204 2024-05-22 11:04:51 0 days 00:10:36 2024-05-22 11:05:00 \n", + "3205 2024-05-22 11:12:43 0 days 00:07:52 2024-05-22 11:14:00 \n", + "3206 2024-05-22 11:24:26 0 days 00:11:43 2024-05-22 11:24:00 \n", + "3207 2024-05-22 11:42:04 0 days 00:17:38 2024-05-22 11:44:00 \n", + "3208 2024-05-22 11:57:13 0 days 00:15:09 2024-05-22 11:54:00 \n", + "3209 2024-05-22 12:01:54 0 days 00:04:41 2024-05-22 12:04:00 \n", + "3210 2024-05-22 12:15:13 0 days 00:13:19 2024-05-22 12:14:00 \n", + "3211 2024-05-22 12:30:30 0 days 00:15:17 2024-05-22 12:24:00 \n", + "3212 2024-05-22 12:31:42 0 days 00:01:12 2024-05-22 12:33:00 \n", + "3213 2024-05-22 12:41:51 0 days 00:10:09 2024-05-22 12:43:00 \n", + "3214 2024-05-22 12:52:37 0 days 00:10:46 2024-05-22 12:52:00 \n", + "3215 2024-05-22 13:04:34 0 days 00:11:57 2024-05-22 13:02:00 \n", + "3216 2024-05-22 13:10:38 0 days 00:06:04 2024-05-22 13:12:00 \n", + "3217 2024-05-22 13:22:41 0 days 00:12:03 2024-05-22 13:22:00 \n", + "3218 2024-05-22 13:34:52 0 days 00:12:11 2024-05-22 13:32:00 \n", + "3219 2024-05-22 13:40:17 0 days 00:05:25 2024-05-22 13:42:00 \n", + "3220 2024-05-22 13:54:38 0 days 00:14:21 2024-05-22 13:52:00 \n", + "3221 2024-05-22 14:03:04 0 days 00:08:26 2024-05-22 14:02:00 \n", + "3222 2024-05-22 14:13:27 0 days 00:10:23 2024-05-22 14:12:00 \n", + "3223 2024-05-22 14:25:54 0 days 00:12:27 2024-05-22 14:22:00 \n", + "3224 2024-05-22 14:34:26 0 days 00:08:32 2024-05-22 14:33:00 \n", + "3225 2024-05-22 14:43:28 0 days 00:09:02 2024-05-22 14:43:00 \n", + "3226 2024-05-22 14:53:01 0 days 00:09:33 2024-05-22 14:52:00 \n", + "3227 2024-05-22 15:05:58 0 days 00:12:57 2024-05-22 15:02:00 \n", + "3228 2024-05-22 15:12:29 0 days 00:06:31 2024-05-22 15:12:00 \n", + "3229 2024-05-22 15:24:45 0 days 00:12:16 2024-05-22 15:22:00 \n", + "3230 2024-05-22 15:34:36 0 days 00:09:51 2024-05-22 15:32:00 \n", + "3231 2024-05-22 15:43:56 0 days 00:09:20 2024-05-22 15:42:00 \n", + "3232 2024-05-22 16:00:23 0 days 00:16:27 2024-05-22 15:52:00 \n", + "3233 2024-05-22 16:07:49 0 days 00:07:26 2024-05-22 16:02:00 \n", + "3234 2024-05-22 16:16:51 0 days 00:09:02 2024-05-22 16:12:00 \n", + "3235 2024-05-22 16:24:11 0 days 00:07:20 2024-05-22 16:22:00 \n", + "3236 2024-05-22 16:38:58 0 days 00:14:47 2024-05-22 16:32:00 \n", + "3237 2024-05-22 16:43:42 0 days 00:04:44 2024-05-22 16:42:00 \n", + "3238 2024-05-22 16:52:41 0 days 00:08:59 2024-05-22 16:52:00 \n", + "3239 2024-05-22 17:07:31 0 days 00:14:50 2024-05-22 17:03:00 \n", + "3240 2024-05-22 17:13:33 0 days 00:06:02 2024-05-22 17:13:00 \n", + "3241 2024-05-22 17:27:07 0 days 00:13:34 2024-05-22 17:23:00 \n", + "3242 2024-05-22 17:42:07 0 days 00:15:00 2024-05-22 17:33:00 \n", + "3243 2024-05-22 17:47:20 0 days 00:05:13 2024-05-22 17:43:00 \n", + "3244 2024-05-22 18:00:56 0 days 00:13:36 2024-05-22 17:53:00 \n", + "3245 2024-05-22 18:10:46 0 days 00:09:50 2024-05-22 18:03:00 \n", + "3246 2024-05-22 18:25:31 0 days 00:14:45 2024-05-22 18:13:00 \n", + "3247 2024-05-22 18:26:20 0 days 00:00:49 2024-05-22 18:23:00 \n", + "3248 2024-05-22 18:34:45 0 days 00:08:25 2024-05-22 18:34:00 \n", + "3249 2024-05-22 19:03:14 0 days 00:28:29 2024-05-22 18:48:00 \n", + "3250 2024-05-22 19:04:47 0 days 00:01:33 2024-05-22 19:03:00 \n", + "3251 2024-05-22 19:31:43 0 days 00:26:56 2024-05-22 19:22:00 \n", + "3252 2024-05-22 19:34:18 0 days 00:02:35 2024-05-22 19:33:00 \n", + "3253 2024-05-22 19:55:09 0 days 00:20:51 2024-05-22 19:50:00 \n", + "3254 2024-05-22 20:09:02 0 days 00:13:53 2024-05-22 20:04:00 \n", + "3255 2024-05-22 20:29:22 0 days 00:20:20 2024-05-22 20:31:00 \n", + "3256 2024-05-22 20:52:10 0 days 00:22:48 2024-05-22 20:51:00 \n", + "3257 2024-05-22 21:14:08 0 days 00:21:58 2024-05-22 21:11:00 \n", + "3258 2024-05-22 21:34:28 0 days 00:20:20 2024-05-22 21:31:00 \n", + "3259 2024-05-22 21:49:00 0 days 00:14:32 2024-05-22 21:51:00 \n", + "3260 2024-05-22 22:09:31 0 days 00:20:31 2024-05-22 22:11:00 \n", + "3261 2024-05-22 22:38:18 0 days 00:28:47 2024-05-22 22:36:00 \n", + "3262 2024-05-22 23:09:06 0 days 00:30:48 2024-05-22 23:02:00 \n", + "3263 2024-05-22 23:34:30 0 days 00:25:24 2024-05-22 23:32:00 \n", + "\n", + " scheduled_arrival_lag pct_actual_schd_headway bunched_y_n \n", + "3158 NaT NaN not bunched \n", + "3159 0 days 00:30:00 0.90 not bunched \n", + "3160 0 days 00:32:00 1.27 not bunched \n", + "3161 0 days 00:30:00 0.70 not bunched \n", + "3162 0 days 00:30:00 0.97 not bunched \n", + "3163 0 days 00:30:00 1.00 not bunched \n", + "3164 0 days 00:30:00 0.94 not bunched \n", + "3165 0 days 00:30:00 1.09 not bunched \n", + "3166 0 days 00:30:00 0.96 not bunched \n", + "3167 0 days 00:30:00 1.17 not bunched \n", + "3168 0 days 00:28:00 0.80 not bunched \n", + "3169 0 days 00:15:00 1.05 not bunched \n", + "3170 0 days 00:12:00 0.87 not bunched \n", + "3171 0 days 00:10:00 0.98 not bunched \n", + "3172 0 days 00:09:00 1.55 not bunched \n", + "3173 0 days 00:11:00 0.53 not bunched \n", + "3174 0 days 00:10:00 1.30 not bunched \n", + "3175 0 days 00:10:00 1.01 not bunched \n", + "3176 0 days 00:10:00 0.76 not bunched \n", + "3177 0 days 00:19:00 0.14 bunched \n", + "3178 -1 days +23:51:00 -0.90 bunched \n", + "3179 0 days 00:18:00 1.20 not bunched \n", + "3180 0 days 00:09:00 0.39 not bunched \n", + "3181 0 days 00:09:00 1.71 not bunched \n", + "3182 0 days 00:10:00 0.24 bunched \n", + "3183 0 days 00:10:00 1.99 not bunched \n", + "3184 0 days 00:10:00 0.42 not bunched \n", + "3185 0 days 00:11:00 0.86 not bunched \n", + "3186 0 days 00:09:00 2.01 not bunched \n", + "3187 0 days 00:10:00 0.23 bunched \n", + "3188 0 days 00:10:00 1.55 not bunched \n", + "3189 0 days 00:10:00 0.29 not bunched \n", + "3190 0 days 00:10:00 1.48 not bunched \n", + "3191 0 days 00:11:00 0.59 not bunched \n", + "3192 0 days 00:10:00 1.25 not bunched \n", + "3193 0 days 00:10:00 0.99 not bunched \n", + "3194 0 days 00:10:00 0.62 not bunched \n", + "3195 0 days 00:10:00 1.01 not bunched \n", + "3196 0 days 00:10:00 1.26 not bunched \n", + "3197 0 days 00:10:00 0.75 not bunched \n", + "3198 0 days 00:10:00 1.61 not bunched \n", + "3199 0 days 00:10:00 0.39 not bunched \n", + "3200 0 days 00:10:00 1.10 not bunched \n", + "3201 0 days 00:10:00 1.55 not bunched \n", + "3202 0 days 00:09:00 0.41 not bunched \n", + "3203 0 days 00:10:00 0.94 not bunched \n", + "3204 0 days 00:10:00 1.06 not bunched \n", + "3205 0 days 00:09:00 0.87 not bunched \n", + "3206 0 days 00:10:00 1.17 not bunched \n", + "3207 0 days 00:20:00 0.88 not bunched \n", + "3208 0 days 00:10:00 1.51 not bunched \n", + "3209 0 days 00:10:00 0.47 not bunched \n", + "3210 0 days 00:10:00 1.33 not bunched \n", + "3211 0 days 00:10:00 1.53 not bunched \n", + "3212 0 days 00:09:00 0.13 bunched \n", + "3213 0 days 00:10:00 1.01 not bunched \n", + "3214 0 days 00:09:00 1.20 not bunched \n", + "3215 0 days 00:10:00 1.20 not bunched \n", + "3216 0 days 00:10:00 0.61 not bunched \n", + "3217 0 days 00:10:00 1.21 not bunched \n", + "3218 0 days 00:10:00 1.22 not bunched \n", + "3219 0 days 00:10:00 0.54 not bunched \n", + "3220 0 days 00:10:00 1.44 not bunched \n", + "3221 0 days 00:10:00 0.84 not bunched \n", + "3222 0 days 00:10:00 1.04 not bunched \n", + "3223 0 days 00:10:00 1.25 not bunched \n", + "3224 0 days 00:11:00 0.78 not bunched \n", + "3225 0 days 00:10:00 0.90 not bunched \n", + "3226 0 days 00:09:00 1.06 not bunched \n", + "3227 0 days 00:10:00 1.29 not bunched \n", + "3228 0 days 00:10:00 0.65 not bunched \n", + "3229 0 days 00:10:00 1.23 not bunched \n", + "3230 0 days 00:10:00 0.98 not bunched \n", + "3231 0 days 00:10:00 0.93 not bunched \n", + "3232 0 days 00:10:00 1.65 not bunched \n", + "3233 0 days 00:10:00 0.74 not bunched \n", + "3234 0 days 00:10:00 0.90 not bunched \n", + "3235 0 days 00:10:00 0.73 not bunched \n", + "3236 0 days 00:10:00 1.48 not bunched \n", + "3237 0 days 00:10:00 0.47 not bunched \n", + "3238 0 days 00:10:00 0.90 not bunched \n", + "3239 0 days 00:11:00 1.35 not bunched \n", + "3240 0 days 00:10:00 0.60 not bunched \n", + "3241 0 days 00:10:00 1.36 not bunched \n", + "3242 0 days 00:10:00 1.50 not bunched \n", + "3243 0 days 00:10:00 0.52 not bunched \n", + "3244 0 days 00:10:00 1.36 not bunched \n", + "3245 0 days 00:10:00 0.98 not bunched \n", + "3246 0 days 00:10:00 1.48 not bunched \n", + "3247 0 days 00:10:00 0.08 bunched \n", + "3248 0 days 00:11:00 0.77 not bunched \n", + "3249 0 days 00:14:00 2.03 not bunched \n", + "3250 0 days 00:15:00 0.10 bunched \n", + "3251 0 days 00:19:00 1.42 not bunched \n", + "3252 0 days 00:11:00 0.23 bunched \n", + "3253 0 days 00:17:00 1.23 not bunched \n", + "3254 0 days 00:14:00 0.99 not bunched \n", + "3255 0 days 00:27:00 0.75 not bunched \n", + "3256 0 days 00:20:00 1.14 not bunched \n", + "3257 0 days 00:20:00 1.10 not bunched \n", + "3258 0 days 00:20:00 1.02 not bunched \n", + "3259 0 days 00:20:00 0.73 not bunched \n", + "3260 0 days 00:20:00 1.03 not bunched \n", + "3261 0 days 00:25:00 1.15 not bunched \n", + "3262 0 days 00:26:00 1.18 not bunched \n", + "3263 0 days 00:30:00 0.85 not bunched " + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "example1[\n", + " [\n", + " \"converted_rt_arrival\",\n", + " \"actual_arrival_lag\",\n", + " \"converted_schd_arrival\",\n", + " \"scheduled_arrival_lag\",\n", + " \"pct_actual_schd_headway\",\n", + " \"bunched_y_n\",\n", + " ]\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "db10254b-d5d4-4619-9c6e-10fab19ec6b2", + "metadata": {}, + "source": [ + "#### Groupby grain and see how many trips for that grain are considered \"bunched\" or not.\n", + "* Take out `stop_sequence`?" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "6f94980e-b9c4-4c1e-a497-ea4b3f88c55b", + "metadata": {}, + "outputs": [], + "source": [ + "transit_matters_agg = [\n", + " \"caltrans_district\",\n", + " \"organization_name\",\n", + " \"route_long_name\",\n", + " \"route_type_str\",\n", + " \"shape_array_key\",\n", + " \"route_id\",\n", + " \"stop_id\",\n", + " \"route_primary_direction\",\n", + " \"bunched_y_n\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "e72f4e50-da82-4a59-8aae-fc6b79f91cc5", + "metadata": {}, + "outputs": [], + "source": [ + "transit_matters_df2 = (\n", + " transit_matters_df1.groupby(transit_matters_agg)\n", + " .agg({\"trip_instance_key\": \"nunique\"})\n", + " .reset_index()\n", + ").rename(columns = {\"trip_instance_key\":\"all_trips\"} )" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "id": "ebf0e403-89c5-4b2c-b962-44affe3c0b42", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter out only rows that are bunched.\n", + "bunched_only = transit_matters_df2.loc[\n", + " transit_matters_df2.bunched_y_n == \"bunched\"\n", + "].reset_index(drop=True).drop(columns = [\"bunched_y_n\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "id": "22eb6ee2-e1ac-4b31-8363-42003d03538d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "730" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(bunched_only)" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "7591762b-abab-4083-8b42-53dca0523fca", + "metadata": {}, + "outputs": [], + "source": [ + "bunched_only = bunched_only.rename(columns={\"all_trips\": \"bunched_trips\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "42172962-6346-4bb1-bcbf-4101468dce4a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caltrans_districtorganization_nameroute_long_nameroute_type_strshape_array_keyroute_idstop_idroute_primary_directionbunched_trips
003 - MarysvilleYolo County Transportation DistrictROUTE 215 WBBuse939d633652e2af6d3aa82d28a042dbf07959480-2a40-4a51-92ac-8ca2029d5f4fa1d203a6-cfdd-40a1-af2f-0fa502ea65b4Westbound1
103 - MarysvilleYolo County Transportation DistrictROUTE 215 WBBuse939d633652e2af6d3aa82d28a042dbf07959480-2a40-4a51-92ac-8ca2029d5f4fc8a9bfc8-7e84-483b-95bc-02a1494c3ae3Westbound1
\n", + "
" + ], + "text/plain": [ + " caltrans_district organization_name route_long_name \\\n", + "0 03 - Marysville Yolo County Transportation District ROUTE 215 WB \n", + "1 03 - Marysville Yolo County Transportation District ROUTE 215 WB \n", + "\n", + " route_type_str shape_array_key \\\n", + "0 Bus e939d633652e2af6d3aa82d28a042dbf \n", + "1 Bus e939d633652e2af6d3aa82d28a042dbf \n", + "\n", + " route_id stop_id \\\n", + "0 07959480-2a40-4a51-92ac-8ca2029d5f4f a1d203a6-cfdd-40a1-af2f-0fa502ea65b4 \n", + "1 07959480-2a40-4a51-92ac-8ca2029d5f4f c8a9bfc8-7e84-483b-95bc-02a1494c3ae3 \n", + "\n", + " route_primary_direction bunched_trips \n", + "0 Westbound 1 \n", + "1 Westbound 1 " + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bunched_only.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "a4a6e1c8-5ac4-47f2-996e-2c0ed2f13c80", + "metadata": {}, + "outputs": [], + "source": [ + "transit_matters_df2 = transit_matters_df2.drop(columns = [\"bunched_y_n\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "ed446323-884f-46ba-b546-8d59a5c7a6f9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caltrans_districtorganization_nameroute_long_nameroute_type_strshape_array_keyroute_idstop_idroute_primary_directionall_trips
001 - EurekaCity of EurekaAMRTS Gold RouteBusc47c15ffc43da6e556ff913272778e4d141262Northbound11
101 - EurekaCity of EurekaAMRTS Gold RouteBusc47c15ffc43da6e556ff913272778e4d141264Northbound11
\n", + "
" + ], + "text/plain": [ + " caltrans_district organization_name route_long_name route_type_str \\\n", + "0 01 - Eureka City of Eureka AMRTS Gold Route Bus \n", + "1 01 - Eureka City of Eureka AMRTS Gold Route Bus \n", + "\n", + " shape_array_key route_id stop_id route_primary_direction \\\n", + "0 c47c15ffc43da6e556ff913272778e4d 14 1262 Northbound \n", + "1 c47c15ffc43da6e556ff913272778e4d 14 1264 Northbound \n", + "\n", + " all_trips \n", + "0 11 \n", + "1 11 " + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transit_matters_df2.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "77483529-0602-4614-b70f-6c19a13f00f6", + "metadata": {}, + "outputs": [], + "source": [ + "transit_matters_agg.remove(\"bunched_y_n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "a949add8-c4ab-4e09-b7e1-e53e6be07bf1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['caltrans_district',\n", + " 'organization_name',\n", + " 'route_long_name',\n", + " 'route_type_str',\n", + " 'shape_array_key',\n", + " 'route_id',\n", + " 'stop_id',\n", + " 'route_primary_direction']" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transit_matters_agg" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "2f57e136-fc83-4eac-bc05-84f530e2f4b0", + "metadata": {}, + "outputs": [], + "source": [ + "# Merge back, using left merge to keep bunching to find % of bunched trips\n", + "transit_matters_m1 = pd.merge(\n", + " transit_matters_df2, bunched_only, on=transit_matters_agg, how=\"outer\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "6f4c4386-acfb-483a-947d-0279710df61b", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caltrans_districtorganization_nameroute_long_nameroute_type_strshape_array_keyroute_idstop_idroute_primary_directionall_tripsbunched_trips
413107 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus6a10ede3fa469c8b4d9bf761946ed20a204-131725637Northbound88.00
413207 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus6a10ede3fa469c8b4d9bf761946ed20a204-131725637Northbound988.00
\n", + "
" + ], + "text/plain": [ + " caltrans_district \\\n", + "4131 07 - Los Angeles \n", + "4132 07 - Los Angeles \n", + "\n", + " organization_name \\\n", + "4131 Los Angeles County Metropolitan Transportation Authority \n", + "4132 Los Angeles County Metropolitan Transportation Authority \n", + "\n", + " route_long_name route_type_str shape_array_key \\\n", + "4131 Metro Local Line Bus 6a10ede3fa469c8b4d9bf761946ed20a \n", + "4132 Metro Local Line Bus 6a10ede3fa469c8b4d9bf761946ed20a \n", + "\n", + " route_id stop_id route_primary_direction all_trips bunched_trips \n", + "4131 204-13172 5637 Northbound 8 8.00 \n", + "4132 204-13172 5637 Northbound 98 8.00 " + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transit_matters_m1.loc[\n", + " (transit_matters_m1.stop_id == \"5637\")\n", + " & (\n", + " transit_matters_m1.organization_name\n", + " == \"Los Angeles County Metropolitan Transportation Authority\"\n", + " )\n", + " & (transit_matters_m1.route_id == \"204-13172\")\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "id": "5a4c017b-7a0f-4c2c-8999-dad006c8cab3", + "metadata": {}, + "outputs": [], + "source": [ + "transit_matters_m2 = transit_matters_m1.sort_values(by = [\"all_trips\"], ascending = False).drop_duplicates(subset = transit_matters_agg).reset_index(drop = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "id": "d383e8e6-2ad1-452d-9b4c-ff5300cad764", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caltrans_districtorganization_nameroute_long_nameroute_type_strshape_array_keyroute_idstop_idroute_primary_directionall_tripsbunched_trips
7107 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus6a10ede3fa469c8b4d9bf761946ed20a204-131725637Northbound988.00
\n", + "
" + ], + "text/plain": [ + " caltrans_district \\\n", + "71 07 - Los Angeles \n", + "\n", + " organization_name \\\n", + "71 Los Angeles County Metropolitan Transportation Authority \n", + "\n", + " route_long_name route_type_str shape_array_key \\\n", + "71 Metro Local Line Bus 6a10ede3fa469c8b4d9bf761946ed20a \n", + "\n", + " route_id stop_id route_primary_direction all_trips bunched_trips \n", + "71 204-13172 5637 Northbound 98 8.00 " + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transit_matters_m2.loc[\n", + " (transit_matters_m2.stop_id == \"5637\")\n", + " & (\n", + " transit_matters_m2.organization_name\n", + " == \"Los Angeles County Metropolitan Transportation Authority\"\n", + " )\n", + " & (transit_matters_m2.route_id == \"204-13172\")\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "55b8feb0-1e0a-4717-b966-ee360becfde5", + "metadata": {}, + "outputs": [], + "source": [ + "transit_matters_m2[\"pct_trips_bunched\"] = (\n", + " transit_matters_m2.bunched_trips / transit_matters_m2.all_trips * 100\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "4a25225a-7883-457f-a27a-f606164cdd37", + "metadata": {}, + "outputs": [], + "source": [ + "transit_matters_m2.pct_trips_bunched = transit_matters_m2.pct_trips_bunched.fillna(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "id": "2e2abda3-b3de-4f03-baf4-7f764dd10255", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 5871.00\n", + "mean 0.64\n", + "std 2.91\n", + "min 0.00\n", + "1% 0.00\n", + "2% 0.00\n", + "5% 0.00\n", + "10% 0.00\n", + "50% 0.00\n", + "90% 1.79\n", + "95% 4.04\n", + "98% 8.47\n", + "99% 11.54\n", + "max 100.00\n", + "Name: pct_trips_bunched, dtype: float64" + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transit_matters_m2.pct_trips_bunched.describe(percentiles)" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "id": "086eace2-81df-46fa-9bd4-c6aa5ea3db79", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caltrans_districtorganization_nameroute_long_nameroute_type_strshape_array_keyroute_idstop_idroute_primary_directionall_tripsbunched_tripspct_trips_bunched
559904 - OaklandPeninsula Corridor Joint Powers BoardLocalRail8c4de04e7398d418c12cc1541651e951L170022Eastbound11.00100.00
558204 - OaklandPeninsula Corridor Joint Powers BoardLocalRail8c4de04e7398d418c12cc1541651e951L170032Eastbound11.00100.00
72804 - OaklandEmeryville Transportation Management AgencyHollisBus0628e405f93c2d0b5e3e68a7115857d4Hollis855335Northbound4412.0027.27
436207 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus0688a14c97a2ebfe90f5674c1262d741217-13172104720Southbound41.0025.00
435607 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus0688a14c97a2ebfe90f5674c1262d741217-1317211013Southbound41.0025.00
433707 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus0688a14c97a2ebfe90f5674c1262d741217-1317211031Southbound41.0025.00
291106 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307469Westbound133.0023.08
258806 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307692Westbound153.0020.00
263106 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307698Westbound153.0020.00
262506 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307716Westbound153.0020.00
261106 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307718Westbound153.0020.00
260006 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307356Westbound153.0020.00
259906 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307466Westbound153.0020.00
259706 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307467Westbound153.0020.00
259606 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307695Westbound153.0020.00
259306 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307686Westbound153.0020.00
259206 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307687Westbound153.0020.00
259106 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307689Westbound153.0020.00
259006 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307690Westbound153.0020.00
258706 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307693Westbound153.0020.00
262706 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307699Westbound153.0020.00
260606 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307355Westbound153.0020.00
258906 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307691Westbound153.0020.00
258306 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307468Westbound153.0020.00
262406 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307696Westbound153.0020.00
263306 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307697Westbound153.0020.00
262306 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307717Westbound153.0020.00
262206 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307719Westbound153.0020.00
428405 - San Luis ObispoSanta Cruz Metropolitan Transit DistrictRiver/Harvey West/EmelineBus56f57a645c3aff1c35f50d5ea1385a7541216Eastbound51.0020.00
258506 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307694Westbound153.0020.00
262106 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307720Westbound153.0020.00
262006 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420423869709Westbound153.0020.00
261906 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420423869710Westbound153.0020.00
262606 - FresnoCity of VisaliaRoute 9Bus60da59c7000ea5dcb5f845d8fa227f1420422307700Westbound153.0020.00
414303 - MarysvilleYolo County Transportation DistrictROUTE 215 WBBuse939d633652e2af6d3aa82d28a042dbf07959480-2a40-4a51-92ac-8ca2029d5f4fa1d203a6-cfdd-40a1-af2f-0fa502ea65b4Westbound61.0016.67
26907 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus4e1e59e768bfbf8c30d0369f89e27aa6603-131728066Northbound6710.0014.93
26707 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus4e1e59e768bfbf8c30d0369f89e27aa6603-131723362Northbound6710.0014.93
386605 - San Luis ObispoSanta Cruz Metropolitan Transit DistrictRiver/Harvey West/EmelineBus56f57a645c3aff1c35f50d5ea1385a7541214Eastbound71.0014.29
386705 - San Luis ObispoSanta Cruz Metropolitan Transit DistrictRiver/Harvey West/EmelineBus56f57a645c3aff1c35f50d5ea1385a7541215Eastbound71.0014.29
9907 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus6a10ede3fa469c8b4d9bf761946ed20a204-131723963Northbound9213.0014.13
9807 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus6a10ede3fa469c8b4d9bf761946ed20a204-131727092Northbound9213.0014.13
72607 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus2e0e2720bd3b6c1510ab780896264ae4206-1317214032Southbound446.0013.64
71607 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus2e0e2720bd3b6c1510ab780896264ae4206-1317214027Southbound456.0013.33
9707 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus6a10ede3fa469c8b4d9bf761946ed20a204-131725652Northbound9312.0012.90
364405 - San Luis ObispoSanta Cruz Metropolitan Transit DistrictUCSC/Capitola Mall/Live OakBus9b4a79b5b21e2ca01ee032b1c39ca3c53B1388Eastbound81.0012.50
362503 - MarysvilleYolo County Transportation DistrictROUTE 215 WBBuse939d633652e2af6d3aa82d28a042dbf07959480-2a40-4a51-92ac-8ca2029d5f4fc8a9bfc8-7e84-483b-95bc-02a1494c3ae3Westbound81.0012.50
26807 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus4e1e59e768bfbf8c30d0369f89e27aa6603-131728689Northbound678.0011.94
57607 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus2e0e2720bd3b6c1510ab780896264ae4206-1317214078Southbound516.0011.76
26207 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus4e1e59e768bfbf8c30d0369f89e27aa6603-131722203Northbound688.0011.76
59207 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus2e0e2720bd3b6c1510ab780896264ae4206-1317215437Southbound516.0011.76
\n", + "
" + ], + "text/plain": [ + " caltrans_district \\\n", + "5599 04 - Oakland \n", + "5582 04 - Oakland \n", + "728 04 - Oakland \n", + "4362 07 - Los Angeles \n", + "4356 07 - Los Angeles \n", + "4337 07 - Los Angeles \n", + "2911 06 - Fresno \n", + "2588 06 - Fresno \n", + "2631 06 - Fresno \n", + "2625 06 - Fresno \n", + "2611 06 - Fresno \n", + "2600 06 - Fresno \n", + "2599 06 - Fresno \n", + "2597 06 - Fresno \n", + "2596 06 - Fresno \n", + "2593 06 - Fresno \n", + "2592 06 - Fresno \n", + "2591 06 - Fresno \n", + "2590 06 - Fresno \n", + "2587 06 - Fresno \n", + "2627 06 - Fresno \n", + "2606 06 - Fresno \n", + "2589 06 - Fresno \n", + "2583 06 - Fresno \n", + "2624 06 - Fresno \n", + "2633 06 - Fresno \n", + "2623 06 - Fresno \n", + "2622 06 - Fresno \n", + "4284 05 - San Luis Obispo \n", + "2585 06 - Fresno \n", + "2621 06 - Fresno \n", + "2620 06 - Fresno \n", + "2619 06 - Fresno \n", + "2626 06 - Fresno \n", + "4143 03 - Marysville \n", + "269 07 - Los Angeles \n", + "267 07 - Los Angeles \n", + "3866 05 - San Luis Obispo \n", + "3867 05 - San Luis Obispo \n", + "99 07 - Los Angeles \n", + "98 07 - Los Angeles \n", + "726 07 - Los Angeles \n", + "716 07 - Los Angeles \n", + "97 07 - Los Angeles \n", + "3644 05 - San Luis Obispo \n", + "3625 03 - Marysville \n", + "268 07 - Los Angeles \n", + "576 07 - Los Angeles \n", + "262 07 - Los Angeles \n", + "592 07 - Los Angeles \n", + "\n", + " organization_name \\\n", + "5599 Peninsula Corridor Joint Powers Board \n", + "5582 Peninsula Corridor Joint Powers Board \n", + "728 Emeryville Transportation Management Agency \n", + "4362 Los Angeles County Metropolitan Transportation Authority \n", + "4356 Los Angeles County Metropolitan Transportation Authority \n", + "4337 Los Angeles County Metropolitan Transportation Authority \n", + "2911 City of Visalia \n", + "2588 City of Visalia \n", + "2631 City of Visalia \n", + "2625 City of Visalia \n", + "2611 City of Visalia \n", + "2600 City of Visalia \n", + "2599 City of Visalia \n", + "2597 City of Visalia \n", + "2596 City of Visalia \n", + "2593 City of Visalia \n", + "2592 City of Visalia \n", + "2591 City of Visalia \n", + "2590 City of Visalia \n", + "2587 City of Visalia \n", + "2627 City of Visalia \n", + "2606 City of Visalia \n", + "2589 City of Visalia \n", + "2583 City of Visalia \n", + "2624 City of Visalia \n", + "2633 City of Visalia \n", + "2623 City of Visalia \n", + "2622 City of Visalia \n", + "4284 Santa Cruz Metropolitan Transit District \n", + "2585 City of Visalia \n", + "2621 City of Visalia \n", + "2620 City of Visalia \n", + "2619 City of Visalia \n", + "2626 City of Visalia \n", + "4143 Yolo County Transportation District \n", + "269 Los Angeles County Metropolitan Transportation Authority \n", + "267 Los Angeles County Metropolitan Transportation Authority \n", + "3866 Santa Cruz Metropolitan Transit District \n", + "3867 Santa Cruz Metropolitan Transit District \n", + "99 Los Angeles County Metropolitan Transportation Authority \n", + "98 Los Angeles County Metropolitan Transportation Authority \n", + "726 Los Angeles County Metropolitan Transportation Authority \n", + "716 Los Angeles County Metropolitan Transportation Authority \n", + "97 Los Angeles County Metropolitan Transportation Authority \n", + "3644 Santa Cruz Metropolitan Transit District \n", + "3625 Yolo County Transportation District \n", + "268 Los Angeles County Metropolitan Transportation Authority \n", + "576 Los Angeles County Metropolitan Transportation Authority \n", + "262 Los Angeles County Metropolitan Transportation Authority \n", + "592 Los Angeles County Metropolitan Transportation Authority \n", + "\n", + " route_long_name route_type_str \\\n", + "5599 Local Rail \n", + "5582 Local Rail \n", + "728 Hollis Bus \n", + "4362 Metro Local Line Bus \n", + "4356 Metro Local Line Bus \n", + "4337 Metro Local Line Bus \n", + "2911 Route 9 Bus \n", + "2588 Route 9 Bus \n", + "2631 Route 9 Bus \n", + "2625 Route 9 Bus \n", + "2611 Route 9 Bus \n", + "2600 Route 9 Bus \n", + "2599 Route 9 Bus \n", + "2597 Route 9 Bus \n", + "2596 Route 9 Bus \n", + "2593 Route 9 Bus \n", + "2592 Route 9 Bus \n", + "2591 Route 9 Bus \n", + "2590 Route 9 Bus \n", + "2587 Route 9 Bus \n", + "2627 Route 9 Bus \n", + "2606 Route 9 Bus \n", + "2589 Route 9 Bus \n", + "2583 Route 9 Bus \n", + "2624 Route 9 Bus \n", + "2633 Route 9 Bus \n", + "2623 Route 9 Bus \n", + "2622 Route 9 Bus \n", + "4284 River/Harvey West/Emeline Bus \n", + "2585 Route 9 Bus \n", + "2621 Route 9 Bus \n", + "2620 Route 9 Bus \n", + "2619 Route 9 Bus \n", + "2626 Route 9 Bus \n", + "4143 ROUTE 215 WB Bus \n", + "269 Metro Local Line Bus \n", + "267 Metro Local Line Bus \n", + "3866 River/Harvey West/Emeline Bus \n", + "3867 River/Harvey West/Emeline Bus \n", + "99 Metro Local Line Bus \n", + "98 Metro Local Line Bus \n", + "726 Metro Local Line Bus \n", + "716 Metro Local Line Bus \n", + "97 Metro Local Line Bus \n", + "3644 UCSC/Capitola Mall/Live Oak Bus \n", + "3625 ROUTE 215 WB Bus \n", + "268 Metro Local Line Bus \n", + "576 Metro Local Line Bus \n", + "262 Metro Local Line Bus \n", + "592 Metro Local Line Bus \n", + "\n", + " shape_array_key route_id \\\n", + "5599 8c4de04e7398d418c12cc1541651e951 L1 \n", + "5582 8c4de04e7398d418c12cc1541651e951 L1 \n", + "728 0628e405f93c2d0b5e3e68a7115857d4 Hollis \n", + "4362 0688a14c97a2ebfe90f5674c1262d741 217-13172 \n", + "4356 0688a14c97a2ebfe90f5674c1262d741 217-13172 \n", + "4337 0688a14c97a2ebfe90f5674c1262d741 217-13172 \n", + "2911 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2588 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2631 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2625 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2611 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2600 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2599 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2597 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2596 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2593 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2592 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2591 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2590 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2587 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2627 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2606 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2589 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2583 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2624 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2633 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2623 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2622 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "4284 56f57a645c3aff1c35f50d5ea1385a75 4 \n", + "2585 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2621 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2620 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2619 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "2626 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", + "4143 e939d633652e2af6d3aa82d28a042dbf 07959480-2a40-4a51-92ac-8ca2029d5f4f \n", + "269 4e1e59e768bfbf8c30d0369f89e27aa6 603-13172 \n", + "267 4e1e59e768bfbf8c30d0369f89e27aa6 603-13172 \n", + "3866 56f57a645c3aff1c35f50d5ea1385a75 4 \n", + "3867 56f57a645c3aff1c35f50d5ea1385a75 4 \n", + "99 6a10ede3fa469c8b4d9bf761946ed20a 204-13172 \n", + "98 6a10ede3fa469c8b4d9bf761946ed20a 204-13172 \n", + "726 2e0e2720bd3b6c1510ab780896264ae4 206-13172 \n", + "716 2e0e2720bd3b6c1510ab780896264ae4 206-13172 \n", + "97 6a10ede3fa469c8b4d9bf761946ed20a 204-13172 \n", + "3644 9b4a79b5b21e2ca01ee032b1c39ca3c5 3B \n", + "3625 e939d633652e2af6d3aa82d28a042dbf 07959480-2a40-4a51-92ac-8ca2029d5f4f \n", + "268 4e1e59e768bfbf8c30d0369f89e27aa6 603-13172 \n", + "576 2e0e2720bd3b6c1510ab780896264ae4 206-13172 \n", + "262 4e1e59e768bfbf8c30d0369f89e27aa6 603-13172 \n", + "592 2e0e2720bd3b6c1510ab780896264ae4 206-13172 \n", + "\n", + " stop_id route_primary_direction all_trips \\\n", + "5599 70022 Eastbound 1 \n", + "5582 70032 Eastbound 1 \n", + "728 855335 Northbound 44 \n", + "4362 104720 Southbound 4 \n", + "4356 11013 Southbound 4 \n", + "4337 11031 Southbound 4 \n", + "2911 2307469 Westbound 13 \n", + "2588 2307692 Westbound 15 \n", + "2631 2307698 Westbound 15 \n", + "2625 2307716 Westbound 15 \n", + "2611 2307718 Westbound 15 \n", + "2600 2307356 Westbound 15 \n", + "2599 2307466 Westbound 15 \n", + "2597 2307467 Westbound 15 \n", + "2596 2307695 Westbound 15 \n", + "2593 2307686 Westbound 15 \n", + "2592 2307687 Westbound 15 \n", + "2591 2307689 Westbound 15 \n", + "2590 2307690 Westbound 15 \n", + "2587 2307693 Westbound 15 \n", + "2627 2307699 Westbound 15 \n", + "2606 2307355 Westbound 15 \n", + "2589 2307691 Westbound 15 \n", + "2583 2307468 Westbound 15 \n", + "2624 2307696 Westbound 15 \n", + "2633 2307697 Westbound 15 \n", + "2623 2307717 Westbound 15 \n", + "2622 2307719 Westbound 15 \n", + "4284 1216 Eastbound 5 \n", + "2585 2307694 Westbound 15 \n", + "2621 2307720 Westbound 15 \n", + "2620 3869709 Westbound 15 \n", + "2619 3869710 Westbound 15 \n", + "2626 2307700 Westbound 15 \n", + "4143 a1d203a6-cfdd-40a1-af2f-0fa502ea65b4 Westbound 6 \n", + "269 8066 Northbound 67 \n", + "267 3362 Northbound 67 \n", + "3866 1214 Eastbound 7 \n", + "3867 1215 Eastbound 7 \n", + "99 3963 Northbound 92 \n", + "98 7092 Northbound 92 \n", + "726 14032 Southbound 44 \n", + "716 14027 Southbound 45 \n", + "97 5652 Northbound 93 \n", + "3644 1388 Eastbound 8 \n", + "3625 c8a9bfc8-7e84-483b-95bc-02a1494c3ae3 Westbound 8 \n", + "268 8689 Northbound 67 \n", + "576 14078 Southbound 51 \n", + "262 2203 Northbound 68 \n", + "592 15437 Southbound 51 \n", + "\n", + " bunched_trips pct_trips_bunched \n", + "5599 1.00 100.00 \n", + "5582 1.00 100.00 \n", + "728 12.00 27.27 \n", + "4362 1.00 25.00 \n", + "4356 1.00 25.00 \n", + "4337 1.00 25.00 \n", + "2911 3.00 23.08 \n", + "2588 3.00 20.00 \n", + "2631 3.00 20.00 \n", + "2625 3.00 20.00 \n", + "2611 3.00 20.00 \n", + "2600 3.00 20.00 \n", + "2599 3.00 20.00 \n", + "2597 3.00 20.00 \n", + "2596 3.00 20.00 \n", + "2593 3.00 20.00 \n", + "2592 3.00 20.00 \n", + "2591 3.00 20.00 \n", + "2590 3.00 20.00 \n", + "2587 3.00 20.00 \n", + "2627 3.00 20.00 \n", + "2606 3.00 20.00 \n", + "2589 3.00 20.00 \n", + "2583 3.00 20.00 \n", + "2624 3.00 20.00 \n", + "2633 3.00 20.00 \n", + "2623 3.00 20.00 \n", + "2622 3.00 20.00 \n", + "4284 1.00 20.00 \n", + "2585 3.00 20.00 \n", + "2621 3.00 20.00 \n", + "2620 3.00 20.00 \n", + "2619 3.00 20.00 \n", + "2626 3.00 20.00 \n", + "4143 1.00 16.67 \n", + "269 10.00 14.93 \n", + "267 10.00 14.93 \n", + "3866 1.00 14.29 \n", + "3867 1.00 14.29 \n", + "99 13.00 14.13 \n", + "98 13.00 14.13 \n", + "726 6.00 13.64 \n", + "716 6.00 13.33 \n", + "97 12.00 12.90 \n", + "3644 1.00 12.50 \n", + "3625 1.00 12.50 \n", + "268 8.00 11.94 \n", + "576 6.00 11.76 \n", + "262 8.00 11.76 \n", + "592 6.00 11.76 " + ] + }, + "execution_count": 121, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transit_matters_m2.sort_values(by = [\"pct_trips_bunched\"], ascending = False).head(50)" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "id": "184ea397-489e-49b1-a325-aa3ee74f5e38", + "metadata": {}, + "outputs": [], + "source": [ + "example2 = transit_matters_df1.loc[\n", + " (transit_matters_df1.stop_id == \"2307719\")\n", + " & (\n", + " transit_matters_df1.organization_name\n", + " == \"City of Visalia\"\n", + " )\n", + " & (transit_matters_df1.route_id == \"2042\")\n", + " & (transit_matters_df1.shape_array_key == \"60da59c7000ea5dcb5f845d8fa227f14\")\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "id": "8b34f0a6-6a2b-4e56-b0fe-bd0f57b53db8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
converted_rt_arrivalactual_arrival_lagconverted_schd_arrivalscheduled_arrival_lagpct_actual_schd_headwaybunched_y_n
913662024-05-22 06:32:47NaT2024-05-22 06:42:00NaTNaNnot bunched
913672024-05-22 07:26:410 days 00:53:542024-05-22 07:27:000 days 00:45:001.20not bunched
913682024-05-22 08:06:110 days 00:39:302024-05-22 08:12:000 days 00:45:000.88not bunched
913692024-05-22 08:56:570 days 00:50:462024-05-22 08:57:000 days 00:45:001.13not bunched
913702024-05-22 09:37:590 days 00:41:022024-05-22 09:42:000 days 00:45:000.91not bunched
913712024-05-22 10:27:260 days 00:49:272024-05-22 10:27:000 days 00:45:001.10not bunched
913722024-05-22 11:10:050 days 00:42:392024-05-22 11:12:000 days 00:45:000.95not bunched
913732024-05-22 12:01:010 days 00:50:562024-05-22 11:57:000 days 00:45:001.13not bunched
913742024-05-22 12:38:080 days 00:37:072024-05-22 12:42:000 days 00:45:000.82not bunched
913752024-05-22 13:27:100 days 00:49:022024-05-22 13:27:000 days 00:45:001.09not bunched
913762024-05-22 14:08:380 days 00:41:282024-05-22 14:12:000 days 00:45:000.92not bunched
913772024-05-22 14:58:220 days 00:49:442024-05-22 14:57:000 days 00:45:001.11not bunched
913782024-05-22 16:26:530 days 01:28:312024-05-22 16:27:000 days 01:30:000.98not bunched
913792024-05-22 17:08:470 days 00:41:542024-05-22 15:42:00-1 days +23:15:00-0.93bunched
913802024-05-22 17:57:230 days 00:48:362024-05-22 17:57:000 days 02:15:000.36not bunched
913812024-05-22 18:41:370 days 00:44:142024-05-22 17:12:00-1 days +23:15:00-0.98bunched
913822024-05-22 19:24:430 days 00:43:062024-05-22 19:27:000 days 02:15:000.32not bunched
913832024-05-22 20:09:430 days 00:45:002024-05-22 18:42:00-1 days +23:15:00-1.00bunched
\n", + "
" + ], + "text/plain": [ + " converted_rt_arrival actual_arrival_lag converted_schd_arrival \\\n", + "91366 2024-05-22 06:32:47 NaT 2024-05-22 06:42:00 \n", + "91367 2024-05-22 07:26:41 0 days 00:53:54 2024-05-22 07:27:00 \n", + "91368 2024-05-22 08:06:11 0 days 00:39:30 2024-05-22 08:12:00 \n", + "91369 2024-05-22 08:56:57 0 days 00:50:46 2024-05-22 08:57:00 \n", + "91370 2024-05-22 09:37:59 0 days 00:41:02 2024-05-22 09:42:00 \n", + "91371 2024-05-22 10:27:26 0 days 00:49:27 2024-05-22 10:27:00 \n", + "91372 2024-05-22 11:10:05 0 days 00:42:39 2024-05-22 11:12:00 \n", + "91373 2024-05-22 12:01:01 0 days 00:50:56 2024-05-22 11:57:00 \n", + "91374 2024-05-22 12:38:08 0 days 00:37:07 2024-05-22 12:42:00 \n", + "91375 2024-05-22 13:27:10 0 days 00:49:02 2024-05-22 13:27:00 \n", + "91376 2024-05-22 14:08:38 0 days 00:41:28 2024-05-22 14:12:00 \n", + "91377 2024-05-22 14:58:22 0 days 00:49:44 2024-05-22 14:57:00 \n", + "91378 2024-05-22 16:26:53 0 days 01:28:31 2024-05-22 16:27:00 \n", + "91379 2024-05-22 17:08:47 0 days 00:41:54 2024-05-22 15:42:00 \n", + "91380 2024-05-22 17:57:23 0 days 00:48:36 2024-05-22 17:57:00 \n", + "91381 2024-05-22 18:41:37 0 days 00:44:14 2024-05-22 17:12:00 \n", + "91382 2024-05-22 19:24:43 0 days 00:43:06 2024-05-22 19:27:00 \n", + "91383 2024-05-22 20:09:43 0 days 00:45:00 2024-05-22 18:42:00 \n", + "\n", + " scheduled_arrival_lag pct_actual_schd_headway bunched_y_n \n", + "91366 NaT NaN not bunched \n", + "91367 0 days 00:45:00 1.20 not bunched \n", + "91368 0 days 00:45:00 0.88 not bunched \n", + "91369 0 days 00:45:00 1.13 not bunched \n", + "91370 0 days 00:45:00 0.91 not bunched \n", + "91371 0 days 00:45:00 1.10 not bunched \n", + "91372 0 days 00:45:00 0.95 not bunched \n", + "91373 0 days 00:45:00 1.13 not bunched \n", + "91374 0 days 00:45:00 0.82 not bunched \n", + "91375 0 days 00:45:00 1.09 not bunched \n", + "91376 0 days 00:45:00 0.92 not bunched \n", + "91377 0 days 00:45:00 1.11 not bunched \n", + "91378 0 days 01:30:00 0.98 not bunched \n", + "91379 -1 days +23:15:00 -0.93 bunched \n", + "91380 0 days 02:15:00 0.36 not bunched \n", + "91381 -1 days +23:15:00 -0.98 bunched \n", + "91382 0 days 02:15:00 0.32 not bunched \n", + "91383 -1 days +23:15:00 -1.00 bunched " + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "example2[\n", + " [\n", + " \"converted_rt_arrival\",\n", + " \"actual_arrival_lag\",\n", + " \"converted_schd_arrival\",\n", + " \"scheduled_arrival_lag\",\n", + " \"pct_actual_schd_headway\",\n", + " \"bunched_y_n\",\n", + " ]\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "b0579e78-2a95-4d8b-9761-2824aa39a8eb", + "metadata": {}, + "source": [ + "### Use 2 minute benchmark\n", + "* [Source](https://static1.squarespace.com/static/533b9a24e4b01d79d0ae4376/t/645e82de1f570b31497c44dc/1683915486889/TransitMatters-Headwaymanagement.pdf)\n", + "* Justifying the use of\n", + "headway maintenance. For example, in April\n", + "2022 the 66 bus significantly bunched around\n", + "several stops. When bunching is defined as\n", + "buses that run within two minutes or less of\n", + "each other, inbound buses towards Nubian\n", + "Square bunched 10% of the time at Brigham\n", + "Circle, 9% at Brookline Village and Roxbury\n", + "Crossing, and 8% of the time at Coolidge\n", + "Corner. Bunching is even more dramatic\n", + "outbound towards Harvard Square where\n", + "buses bunched over 35% of the time at Winship\n", + "St, 13% at Coolidge Corner and Harvard Ave at\n", + "Commonwealth Ave, and 12% at North Harvard\n", + "St at Western Ave. View more data about bus\n", + "bunching through the TransitMatters Data\n", + "Dashboard here.\n", + "\n", + "* To Do: add back in route & operator information" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "id": "e0706e7e-0d56-43b2-bf3c-4205e9277c64", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'rt_stop_times5' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[114], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m two_minutess_df \u001b[38;5;241m=\u001b[39m \u001b[43mrt_stop_times5\u001b[49m\u001b[38;5;241m.\u001b[39mcopy()\n", + "\u001b[0;31mNameError\u001b[0m: name 'rt_stop_times5' is not defined" + ] + } + ], "source": [ "two_minutess_df = rt_stop_times5.copy()" ] From 10121b1ab4d8f141237cafa5cc82b74e52a20c87 Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Tue, 1 Oct 2024 19:34:54 +0000 Subject: [PATCH 2/6] transit bunching 2 min approach, began work on agency metrics in pipeline --- .../shared_utils/gtfs_analytics_data.yml | 2 + gtfs_digest/31_transit_bunching_v2.ipynb | 3200 +++++++++++------ rt_scheduled_v_ran/11_agency_agg.ipynb | 715 ++++ .../scripts/rt_v_scheduled_agency.py | 68 + .../scripts/rt_v_scheduled_routes.py | 1 + rt_scheduled_v_ran/scripts/update_vars.py | 4 +- 6 files changed, 2826 insertions(+), 1164 deletions(-) create mode 100644 rt_scheduled_v_ran/11_agency_agg.ipynb create mode 100644 rt_scheduled_v_ran/scripts/rt_v_scheduled_agency.py diff --git a/_shared_utils/shared_utils/gtfs_analytics_data.yml b/_shared_utils/shared_utils/gtfs_analytics_data.yml index b31c541d8..84df48c27 100644 --- a/_shared_utils/shared_utils/gtfs_analytics_data.yml +++ b/_shared_utils/shared_utils/gtfs_analytics_data.yml @@ -52,10 +52,12 @@ rt_vs_schedule_tables: sched_route_direction_metrics: "schedule_route_dir/schedule_route_direction_metrics" vp_trip_metrics: "vp_trip/trip_metrics" vp_route_direction_metrics: "vp_route_dir/route_direction_metrics" + vp_agency_metrics: "vp_agency/agency_metrics" schedule_rt_stop_times: "schedule_rt_stop_times" early_trip_minutes: -5 late_trip_minutes: 5 + digest_tables: dir: ${gcs_paths.RT_SCHED_GCS} route_schedule_vp: "digest/schedule_vp_metrics" diff --git a/gtfs_digest/31_transit_bunching_v2.ipynb b/gtfs_digest/31_transit_bunching_v2.ipynb index 5c299b8c5..5560b3587 100644 --- a/gtfs_digest/31_transit_bunching_v2.ipynb +++ b/gtfs_digest/31_transit_bunching_v2.ipynb @@ -624,6 +624,27 @@ "trips_freq_routes.shape" ] }, + { + "cell_type": "code", + "execution_count": 27, + "id": "9c1ba23b-30df-4916-a522-eb70bd5afdb9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "158" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trips_freq_routes.route_id.nunique()" + ] + }, { "cell_type": "markdown", "id": "57ae24e1-c656-4482-b228-31637f245542", @@ -637,7 +658,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 28, "id": "d9688e03-4b61-4736-b9d5-3539b0de80b2", "metadata": {}, "outputs": [], @@ -662,7 +683,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 29, "id": "8dac05a6-0ba9-472b-85a2-5a0081550efb", "metadata": {}, "outputs": [], @@ -672,7 +693,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 30, "id": "cd13aa3e-f222-49f8-b923-1e9e901f7bfb", "metadata": {}, "outputs": [], @@ -685,7 +706,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 31, "id": "ae72b7fc-ec7b-4dcd-8553-ac2abce5da1d", "metadata": {}, "outputs": [], @@ -695,7 +716,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 32, "id": "01a26feb-8d31-4308-9868-47840a7f6591", "metadata": {}, "outputs": [ @@ -709,7 +730,7 @@ "Name: route_type_str, dtype: int64" ] }, - "execution_count": 31, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -718,6 +739,27 @@ "high_frequency_routes2.route_type_str.value_counts()" ] }, + { + "cell_type": "code", + "execution_count": 33, + "id": "65c264d6-7e7c-41b6-bc4f-ab19c32c246e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "158" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "high_frequency_routes2.route_id.nunique()" + ] + }, { "cell_type": "markdown", "id": "e3be8778-84ff-479f-a3b2-178e374da5f2", @@ -728,7 +770,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 34, "id": "3f6727c6-205f-4cc8-8a68-42e8dec6e4b3", "metadata": {}, "outputs": [], @@ -740,7 +782,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 35, "id": "12c414e2-3cc3-48f3-b5e3-de174c29fa9d", "metadata": {}, "outputs": [ @@ -754,7 +796,7 @@ "dtype: int64" ] }, - "execution_count": 33, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -775,7 +817,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 36, "id": "049a2833-f132-431a-8f44-92f31cd11d8a", "metadata": {}, "outputs": [], @@ -794,7 +836,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 37, "id": "d1abbbd0-0cee-4f03-a4bb-497209a62498", "metadata": {}, "outputs": [ @@ -804,7 +846,7 @@ "(118214, 20)" ] }, - "execution_count": 35, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -815,7 +857,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 38, "id": "6464ea15-574a-4976-a41f-26440f7ca1ed", "metadata": {}, "outputs": [ @@ -825,7 +867,7 @@ "(3059, 3059)" ] }, - "execution_count": 36, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -834,6 +876,27 @@ "rt_stop_times2.trip_id.nunique(), rt_stop_times2.trip_instance_key.nunique()" ] }, + { + "cell_type": "code", + "execution_count": 39, + "id": "7e7af3e8-16c5-4fca-8c3d-ce0d110bc7bd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "116" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rt_stop_times2.route_id.nunique()" + ] + }, { "cell_type": "markdown", "id": "c256e4ba-a42c-4bb6-ba65-2f79475b2ac4", @@ -844,7 +907,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 40, "id": "50bd294b-d96b-4dd8-9890-cb91cd21852c", "metadata": {}, "outputs": [ @@ -854,7 +917,7 @@ "0.9870827482362495" ] }, - "execution_count": 37, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -867,7 +930,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 41, "id": "7c59d770-d379-422e-a23d-9140c23df375", "metadata": {}, "outputs": [ @@ -877,7 +940,7 @@ "0.012917251763750486" ] }, - "execution_count": 38, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -888,7 +951,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 42, "id": "bfb52e77-27c2-41d9-ab83-de76497577da", "metadata": {}, "outputs": [], @@ -899,7 +962,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 43, "id": "f6bf9598-60b9-4169-9f71-53be00cef8b8", "metadata": {}, "outputs": [], @@ -911,7 +974,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 44, "id": "70477a11-34b4-45a7-9b1f-35dfd2d68231", "metadata": {}, "outputs": [ @@ -921,7 +984,7 @@ "116687" ] }, - "execution_count": 41, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -932,7 +995,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 45, "id": "ca2e1613-2d2c-40ba-8518-2e3297c13ec8", "metadata": {}, "outputs": [ @@ -942,7 +1005,7 @@ "118214" ] }, - "execution_count": 42, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -961,7 +1024,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 46, "id": "0ef36e93-79ed-4f86-b16a-9d28d90aea1a", "metadata": {}, "outputs": [], @@ -996,7 +1059,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 47, "id": "c93dffd6-6fe7-43f0-85fb-061b20d29a74", "metadata": {}, "outputs": [ @@ -1014,7 +1077,7 @@ "Name: scheduled_arrival_sec, dtype: float64" ] }, - "execution_count": 44, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } @@ -1025,7 +1088,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 48, "id": "97e3dc65-45fc-4318-a5f6-b0f497a6ab04", "metadata": {}, "outputs": [ @@ -1035,7 +1098,7 @@ "(16, 21)" ] }, - "execution_count": 45, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } @@ -1046,7 +1109,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 49, "id": "15a58951-9ee0-4696-9065-b768737a582d", "metadata": {}, "outputs": [ @@ -1056,7 +1119,7 @@ "(2583, 21)" ] }, - "execution_count": 46, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } @@ -1065,30 +1128,9 @@ "rt_stop_times3.loc[rt_stop_times3[\"scheduled_arrival_sec\"] > 86_400].shape" ] }, - { - "cell_type": "markdown", - "id": "641ca6ae-b5e4-4871-95de-fd29447f831a", - "metadata": {}, - "source": [ - "#### Test `scheduled_arrival_sec` rows ` rows that exceed 86,400 seconds" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "id": "546d1250-aad7-40b4-8349-7189da606cc2", - "metadata": {}, - "outputs": [], - "source": [ - "# Convert to midnight anything that goes past the service date\n", - "# rt_stop_times3 = convert_to_midnight(\n", - "# rt_stop_times3, \"scheduled_arrival_sec\", may_date\n", - "# )" - ] - }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 50, "id": "099c6838-aa42-4eb5-a250-0ac3e054f834", "metadata": {}, "outputs": [], @@ -1104,28 +1146,7 @@ }, { "cell_type": "code", - "execution_count": 49, - "id": "f74e40ac-5a09-4ae2-8ac2-1f1269957330", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "82800" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "86_400 - 3600" - ] - }, - { - "cell_type": "code", - "execution_count": 50, + "execution_count": 51, "id": "d64637d8-ca0d-4e3c-b956-13d8e64e513c", "metadata": {}, "outputs": [], @@ -1154,143 +1175,6 @@ " return pd.Timestamp(date) + pd.Timedelta(seconds=seconds)" ] }, - { - "cell_type": "code", - "execution_count": 51, - "id": "7136cbd5-d0f8-47ca-873e-ee00b6c7a207", - "metadata": {}, - "outputs": [], - "source": [ - "more_than_86400 = rt_stop_times3.loc[\n", - " rt_stop_times3[\"scheduled_arrival_sec\"] > 86_400\n", - "].reset_index(drop=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "4b07d362-636f-49ed-983f-cb348660034c", - "metadata": {}, - "outputs": [], - "source": [ - "more_than_86400[\"converted_schd_arrival\"] = more_than_86400.apply(\n", - " lambda row: adjust_days_and_time(\n", - " row[\"scheduled_arrival_sec\"], row[\"service_date\"], row[\"rt_arrival_sec\"]\n", - " ),\n", - " axis=1,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "5db197c8-e234-4427-8fdd-2a204d258040", - "metadata": {}, - "outputs": [], - "source": [ - "more_than_86400[\"converted_rt_arrival\"] = pd.to_datetime(\n", - " more_than_86400[\"service_date\"]\n", - ") + pd.to_timedelta(more_than_86400[\"rt_arrival_sec\"] % 86400, unit=\"s\")" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "394eb491-9f3e-4c14-88d9-963925326dba", - "metadata": {}, - "outputs": [], - "source": [ - "more_than_86400[\"delay_min\"] = (\n", - " more_than_86400[\"converted_rt_arrival\"] - more_than_86400[\"converted_schd_arrival\"]\n", - ").dt.total_seconds() / 60" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "0dc35f0f-2887-415a-80fd-1ef8fd4644dc", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "count 2583.00\n", - "mean 101.87\n", - "std 365.42\n", - "min -18.18\n", - "1% -5.47\n", - "2% -3.25\n", - "5% -1.92\n", - "10% -1.20\n", - "50% 1.75\n", - "90% 9.60\n", - "95% 1440.26\n", - "98% 1445.47\n", - "99% 1448.25\n", - "max 1466.70\n", - "Name: delay_min, dtype: float64\n" - ] - } - ], - "source": [ - "print(\n", - " more_than_86400.delay_min.describe(\n", - " percentiles=[0.01, 0.02, 0.05, 0.1, 0.9, 0.95, 0.98, 0.99]\n", - " )\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "id": "f397e6fd-daa9-41ba-9649-633d377088a0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "count 2583.00\n", - "mean 7041.60\n", - "std 5789.22\n", - "min 9.00\n", - "25% 2556.50\n", - "50% 5874.00\n", - "75% 10739.00\n", - "max 86389.00\n", - "Name: rt_arrival_sec, dtype: float64" - ] - }, - "execution_count": 56, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "more_than_86400.rt_arrival_sec.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "ac956fa6-31a8-47c2-a451-ada54847aecb", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(2583, 24)" - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "more_than_86400.shape" - ] - }, { "cell_type": "markdown", "id": "ca60b033-1f55-45a2-ba8a-bd493d99a95f", @@ -1301,7 +1185,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 52, "id": "41f6aded-7f9c-4f5e-ba7a-2a52769a3e44", "metadata": {}, "outputs": [], @@ -1316,7 +1200,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 53, "id": "3ce579d1-0392-41a6-b535-9f4422a78216", "metadata": {}, "outputs": [], @@ -1328,7 +1212,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 54, "id": "838af336-3592-40d8-be62-5debad7ab0c1", "metadata": {}, "outputs": [], @@ -1340,7 +1224,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 55, "id": "984f27fd-5742-4a76-9467-4a518824f6e8", "metadata": {}, "outputs": [], @@ -1361,7 +1245,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 56, "id": "003abe45-5d55-4839-80f8-83e693214427", "metadata": {}, "outputs": [], @@ -1371,7 +1255,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 57, "id": "8325eed2-412b-4202-9d57-252db2fd7e26", "metadata": {}, "outputs": [], @@ -1381,7 +1265,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 58, "id": "a16335dc-47a0-4ea1-9612-fc6d5dfea1d6", "metadata": {}, "outputs": [ @@ -1413,7 +1297,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 59, "id": "0daa5a88-f43f-40c9-9032-e7409125e9da", "metadata": {}, "outputs": [], @@ -1423,7 +1307,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 60, "id": "8ec23dec-abe6-4c95-9523-a4a6e7c8d1ad", "metadata": {}, "outputs": [], @@ -1433,7 +1317,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 61, "id": "4a87e0b4-ef25-45ad-9ec7-268787d6bb86", "metadata": {}, "outputs": [], @@ -1443,7 +1327,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 62, "id": "8cb4f43f-7b7a-49f3-b3e6-5162df5a99db", "metadata": {}, "outputs": [ @@ -1453,7 +1337,7 @@ "1165" ] }, - "execution_count": 68, + "execution_count": 62, "metadata": {}, "output_type": "execute_result" } @@ -1464,7 +1348,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 63, "id": "1bab6d68-6cb3-44b5-9f08-6e9d82cc9aba", "metadata": {}, "outputs": [], @@ -1474,7 +1358,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 64, "id": "0bbbd0cf-2e53-46da-ba52-0ba0a8614fc9", "metadata": {}, "outputs": [ @@ -1498,7 +1382,7 @@ "Name: delay_min, dtype: float64" ] }, - "execution_count": 70, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } @@ -1509,7 +1393,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 65, "id": "5a991b19-685a-4e8a-89dc-748fc5d9941b", "metadata": {}, "outputs": [ @@ -1519,7 +1403,7 @@ "1167" ] }, - "execution_count": 71, + "execution_count": 65, "metadata": {}, "output_type": "execute_result" } @@ -1530,7 +1414,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 66, "id": "dfc4d1fe-5720-4ba0-8cc4-29dd96175287", "metadata": {}, "outputs": [ @@ -1554,7 +1438,7 @@ "Name: delay_min, dtype: float64" ] }, - "execution_count": 72, + "execution_count": 66, "metadata": {}, "output_type": "execute_result" } @@ -1565,7 +1449,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 67, "id": "332bdda3-d690-441c-a299-c9c5f8be7709", "metadata": {}, "outputs": [], @@ -1575,7 +1459,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 68, "id": "2bc0f183-294a-4617-bc17-aa73a570f6e0", "metadata": {}, "outputs": [ @@ -1585,7 +1469,7 @@ "-52.626666666666665" ] }, - "execution_count": 74, + "execution_count": 68, "metadata": {}, "output_type": "execute_result" } @@ -1596,7 +1480,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 69, "id": "4440bc59-2335-4ef6-abbc-55d334e4bf7d", "metadata": {}, "outputs": [], @@ -1606,7 +1490,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 70, "id": "5d231749-31b5-4899-b36b-4ea5fb55981c", "metadata": {}, "outputs": [ @@ -1616,7 +1500,7 @@ "274.9466666666675" ] }, - "execution_count": 76, + "execution_count": 70, "metadata": {}, "output_type": "execute_result" } @@ -1643,7 +1527,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 71, "id": "bbd1cbc3-9762-44b8-a564-c33e57879d10", "metadata": {}, "outputs": [], @@ -1657,7 +1541,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 72, "id": "7668b5b5-4f1a-4fc7-9fac-b3a113c2b0b2", "metadata": {}, "outputs": [ @@ -1667,7 +1551,7 @@ "-234" ] }, - "execution_count": 78, + "execution_count": 72, "metadata": {}, "output_type": "execute_result" } @@ -1678,7 +1562,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 73, "id": "da609102-30a4-4de3-b519-297822c450e6", "metadata": {}, "outputs": [ @@ -1702,7 +1586,7 @@ "Name: delay_min, dtype: float64" ] }, - "execution_count": 79, + "execution_count": 73, "metadata": {}, "output_type": "execute_result" } @@ -1721,7 +1605,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 74, "id": "b9171520-7358-4f22-9d9c-c5156e710f1b", "metadata": {}, "outputs": [], @@ -1739,35 +1623,7 @@ }, { "cell_type": "code", - "execution_count": 81, - "id": "f7d3055e-c079-4232-9123-2f9b2d3e07c8", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['trip_id', 'stop_id', 'stop_sequence', 'scheduled_arrival_sec',\n", - " 'schedule_gtfs_dataset_key', 'trip_instance_key', 'rt_arrival_sec',\n", - " 'route_id', 'shape_array_key', 'feed_key', 'route_long_name',\n", - " 'direction_id', 'route_type', 'route_primary_direction',\n", - " 'med_headway_minutes', 'organization_name', 'name', 'caltrans_district',\n", - " 'service_date', 'route_type_str', 'scheduled_arrival_sec_copy',\n", - " 'converted_schd_arrival', 'converted_rt_arrival', 'delay_min'],\n", - " dtype='object')" - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rt_stop_times4.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 82, + "execution_count": 75, "id": "f5b62023-ab77-4634-a526-3d822bb7a63f", "metadata": {}, "outputs": [], @@ -1781,7 +1637,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 76, "id": "6328c5fb-205e-4a78-a3d3-62fafa88a4cd", "metadata": {}, "outputs": [], @@ -1793,7 +1649,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 77, "id": "1f3969a8-1850-4b74-96e7-edcaf94e25cd", "metadata": {}, "outputs": [ @@ -1925,7 +1781,7 @@ "9 0 days 00:37:00 " ] }, - "execution_count": 84, + "execution_count": 77, "metadata": {}, "output_type": "execute_result" } @@ -1952,56 +1808,6 @@ "* Katrina: I would fill in the actual/schedule headway columns with 0 rather than dropping the first row in each grouping. I wonder if it makes sense to use a more descriptive column name than headway, such as \"minutes since last vehicle\"" ] }, - { - "cell_type": "code", - "execution_count": 85, - "id": "6603e96d-085d-460f-965f-ba01a10f6dbc", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 116453 entries, 0 to 116452\n", - "Data columns (total 26 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 trip_id 116453 non-null object \n", - " 1 stop_id 116453 non-null object \n", - " 2 stop_sequence 116453 non-null int64 \n", - " 3 scheduled_arrival_sec 116453 non-null float64 \n", - " 4 schedule_gtfs_dataset_key 116453 non-null object \n", - " 5 trip_instance_key 116453 non-null object \n", - " 6 rt_arrival_sec 116453 non-null int64 \n", - " 7 route_id 116453 non-null object \n", - " 8 shape_array_key 116453 non-null object \n", - " 9 feed_key 116453 non-null object \n", - " 10 route_long_name 116453 non-null object \n", - " 11 direction_id 116453 non-null float64 \n", - " 12 route_type 116453 non-null object \n", - " 13 route_primary_direction 116453 non-null object \n", - " 14 med_headway_minutes 116453 non-null float64 \n", - " 15 organization_name 116453 non-null object \n", - " 16 name 116453 non-null object \n", - " 17 caltrans_district 116453 non-null object \n", - " 18 service_date 116453 non-null datetime64[ns] \n", - " 19 route_type_str 116453 non-null object \n", - " 20 scheduled_arrival_sec_copy 116453 non-null float64 \n", - " 21 converted_schd_arrival 116453 non-null datetime64[ns] \n", - " 22 converted_rt_arrival 116453 non-null datetime64[ns] \n", - " 23 delay_min 116453 non-null float64 \n", - " 24 actual_arrival_lag 110343 non-null timedelta64[ns]\n", - " 25 scheduled_arrival_lag 110343 non-null timedelta64[ns]\n", - "dtypes: datetime64[ns](3), float64(5), int64(2), object(14), timedelta64[ns](2)\n", - "memory usage: 23.1+ MB\n" - ] - } - ], - "source": [ - "rt_stop_times4.info()" - ] - }, { "cell_type": "markdown", "id": "28362518-a54b-4f5d-a4d7-24a3d8ddefd0", @@ -2012,7 +1818,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 78, "id": "f0f0f4b7-fa64-4b01-a141-5dd78c59693b", "metadata": {}, "outputs": [], @@ -2022,7 +1828,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 79, "id": "7bea63e5-45d0-4d06-8c1c-fd34a69ffde7", "metadata": {}, "outputs": [], @@ -2034,7 +1840,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 80, "id": "c67c6299-68f0-414f-a9c1-e0b27511b9e5", "metadata": {}, "outputs": [], @@ -2056,7 +1862,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 81, "id": "70bc7bfe-2306-498a-b0d7-e012d266d5fc", "metadata": {}, "outputs": [ @@ -2074,7 +1880,7 @@ "Name: pct_actual_schd_headway, dtype: float64" ] }, - "execution_count": 89, + "execution_count": 81, "metadata": {}, "output_type": "execute_result" } @@ -2085,7 +1891,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 82, "id": "d7d56ce7-66e9-4084-a725-a9eff7c4c5b2", "metadata": {}, "outputs": [ @@ -2097,7 +1903,7 @@ "Name: bunched_y_n, dtype: float64" ] }, - "execution_count": 90, + "execution_count": 82, "metadata": {}, "output_type": "execute_result" } @@ -2108,7 +1914,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 83, "id": "ee874ad7-e2c5-4a4a-8e79-ac79d9d49f30", "metadata": {}, "outputs": [], @@ -2126,7 +1932,7 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 84, "id": "aef09625-a050-463b-8a60-f15dc7839520", "metadata": {}, "outputs": [ @@ -2134,7 +1940,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_2599/3773661829.py:1: SettingWithCopyWarning: \n", + "/tmp/ipykernel_381/3773661829.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", @@ -2149,7 +1955,7 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 85, "id": "6658bb0f-0b3f-4dc2-914c-b144f785d608", "metadata": {}, "outputs": [ @@ -2157,7 +1963,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_2599/579629931.py:1: SettingWithCopyWarning: \n", + "/tmp/ipykernel_381/579629931.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", @@ -2172,7 +1978,7 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 86, "id": "2a301a59-7357-43a0-8f5c-3ba31b889878", "metadata": {}, "outputs": [ @@ -2182,7 +1988,7 @@ "(106, 30)" ] }, - "execution_count": 94, + "execution_count": 86, "metadata": {}, "output_type": "execute_result" } @@ -2193,7 +1999,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 87, "id": "dc43f71d-8f7b-4536-9d15-14a11703fa3d", "metadata": { "scrolled": true, @@ -3406,7 +3212,7 @@ "3263 0 days 00:30:00 0.85 not bunched " ] }, - "execution_count": 95, + "execution_count": 87, "metadata": {}, "output_type": "execute_result" } @@ -3435,7 +3241,7 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 88, "id": "6f94980e-b9c4-4c1e-a497-ea4b3f88c55b", "metadata": {}, "outputs": [], @@ -3455,7 +3261,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 89, "id": "e72f4e50-da82-4a59-8aae-fc6b79f91cc5", "metadata": {}, "outputs": [], @@ -3464,25 +3270,27 @@ " transit_matters_df1.groupby(transit_matters_agg)\n", " .agg({\"trip_instance_key\": \"nunique\"})\n", " .reset_index()\n", - ").rename(columns = {\"trip_instance_key\":\"all_trips\"} )" + ").rename(columns={\"trip_instance_key\": \"all_trips\"})" ] }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 90, "id": "ebf0e403-89c5-4b2c-b962-44affe3c0b42", "metadata": {}, "outputs": [], "source": [ "# Filter out only rows that are bunched.\n", - "bunched_only = transit_matters_df2.loc[\n", - " transit_matters_df2.bunched_y_n == \"bunched\"\n", - "].reset_index(drop=True).drop(columns = [\"bunched_y_n\"])" + "bunched_only = (\n", + " transit_matters_df2.loc[transit_matters_df2.bunched_y_n == \"bunched\"]\n", + " .reset_index(drop=True)\n", + " .drop(columns=[\"bunched_y_n\"])\n", + ")" ] }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 91, "id": "22eb6ee2-e1ac-4b31-8363-42003d03538d", "metadata": {}, "outputs": [ @@ -3492,7 +3300,7 @@ "730" ] }, - "execution_count": 99, + "execution_count": 91, "metadata": {}, "output_type": "execute_result" } @@ -3503,7 +3311,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": 92, "id": "7591762b-abab-4083-8b42-53dca0523fca", "metadata": {}, "outputs": [], @@ -3513,7 +3321,7 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 93, "id": "42172962-6346-4bb1-bcbf-4101468dce4a", "metadata": {}, "outputs": [ @@ -3596,7 +3404,7 @@ "1 Westbound 1 " ] }, - "execution_count": 101, + "execution_count": 93, "metadata": {}, "output_type": "execute_result" } @@ -3607,17 +3415,17 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 94, "id": "a4a6e1c8-5ac4-47f2-996e-2c0ed2f13c80", "metadata": {}, "outputs": [], "source": [ - "transit_matters_df2 = transit_matters_df2.drop(columns = [\"bunched_y_n\"])" + "transit_matters_df2 = transit_matters_df2.drop(columns=[\"bunched_y_n\"])" ] }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 95, "id": "ed446323-884f-46ba-b546-8d59a5c7a6f9", "metadata": {}, "outputs": [ @@ -3696,7 +3504,7 @@ "1 11 " ] }, - "execution_count": 103, + "execution_count": 95, "metadata": {}, "output_type": "execute_result" } @@ -3707,7 +3515,7 @@ }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 96, "id": "77483529-0602-4614-b70f-6c19a13f00f6", "metadata": {}, "outputs": [], @@ -3717,8 +3525,8 @@ }, { "cell_type": "code", - "execution_count": 105, - "id": "a949add8-c4ab-4e09-b7e1-e53e6be07bf1", + "execution_count": 97, + "id": "1395b083-23f4-4f9c-b50e-beb79da7ee39", "metadata": {}, "outputs": [ { @@ -3734,7 +3542,7 @@ " 'route_primary_direction']" ] }, - "execution_count": 105, + "execution_count": 97, "metadata": {}, "output_type": "execute_result" } @@ -3745,7 +3553,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 98, "id": "2f57e136-fc83-4eac-bc05-84f530e2f4b0", "metadata": {}, "outputs": [], @@ -3756,9 +3564,17 @@ ")" ] }, + { + "cell_type": "markdown", + "id": "fe6513fc-306c-4bef-bd65-601d2ffea3cc", + "metadata": {}, + "source": [ + "#### Fix merges, why are the rows duplicate? " + ] + }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 99, "id": "6f4c4386-acfb-483a-947d-0279710df61b", "metadata": { "scrolled": true, @@ -3847,7 +3663,7 @@ "4132 204-13172 5637 Northbound 98 8.00 " ] }, - "execution_count": 107, + "execution_count": 99, "metadata": {}, "output_type": "execute_result" } @@ -3865,17 +3681,51 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 100, "id": "5a4c017b-7a0f-4c2c-8999-dad006c8cab3", "metadata": {}, "outputs": [], "source": [ - "transit_matters_m2 = transit_matters_m1.sort_values(by = [\"all_trips\"], ascending = False).drop_duplicates(subset = transit_matters_agg).reset_index(drop = True)" + "transit_matters_m2 = (\n", + " transit_matters_m1.sort_values(by=[\"all_trips\"], ascending=False)\n", + " .drop_duplicates(subset=transit_matters_agg)\n", + " .reset_index(drop=True)\n", + ")" ] }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 102, + "id": "55b8feb0-1e0a-4717-b966-ee360becfde5", + "metadata": {}, + "outputs": [], + "source": [ + "transit_matters_m2[\"pct_trips_bunched\"] = (\n", + " transit_matters_m2.bunched_trips / transit_matters_m2.all_trips * 100\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "4a25225a-7883-457f-a27a-f606164cdd37", + "metadata": {}, + "outputs": [], + "source": [ + "transit_matters_m2.pct_trips_bunched = transit_matters_m2.pct_trips_bunched.fillna(0)" + ] + }, + { + "cell_type": "markdown", + "id": "a4401e8d-8dbd-4874-93e4-cfa05e711f62", + "metadata": {}, + "source": [ + "#### Do we include bunched trips in which only one trip is ran and they are all bunched?" + ] + }, + { + "cell_type": "code", + "execution_count": 136, "id": "d383e8e6-2ad1-452d-9b4c-ff5300cad764", "metadata": {}, "outputs": [ @@ -3910,6 +3760,7 @@ " route_primary_direction\n", " all_trips\n", " bunched_trips\n", + " pct_trips_bunched\n", " \n", " \n", " \n", @@ -3925,6 +3776,7 @@ " Northbound\n", " 98\n", " 8.00\n", + " 8.16\n", " \n", " \n", "\n", @@ -3940,11 +3792,14 @@ " route_long_name route_type_str shape_array_key \\\n", "71 Metro Local Line Bus 6a10ede3fa469c8b4d9bf761946ed20a \n", "\n", - " route_id stop_id route_primary_direction all_trips bunched_trips \n", - "71 204-13172 5637 Northbound 98 8.00 " + " route_id stop_id route_primary_direction all_trips bunched_trips \\\n", + "71 204-13172 5637 Northbound 98 8.00 \n", + "\n", + " pct_trips_bunched \n", + "71 8.16 " ] }, - "execution_count": 109, + "execution_count": 136, "metadata": {}, "output_type": "execute_result" } @@ -3962,29 +3817,7 @@ }, { "cell_type": "code", - "execution_count": 110, - "id": "55b8feb0-1e0a-4717-b966-ee360becfde5", - "metadata": {}, - "outputs": [], - "source": [ - "transit_matters_m2[\"pct_trips_bunched\"] = (\n", - " transit_matters_m2.bunched_trips / transit_matters_m2.all_trips * 100\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "id": "4a25225a-7883-457f-a27a-f606164cdd37", - "metadata": {}, - "outputs": [], - "source": [ - "transit_matters_m2.pct_trips_bunched = transit_matters_m2.pct_trips_bunched.fillna(0)" - ] - }, - { - "cell_type": "code", - "execution_count": 112, + "execution_count": 104, "id": "2e2abda3-b3de-4f03-baf4-7f764dd10255", "metadata": {}, "outputs": [ @@ -4008,7 +3841,7 @@ "Name: pct_trips_bunched, dtype: float64" ] }, - "execution_count": 112, + "execution_count": 104, "metadata": {}, "output_type": "execute_result" } @@ -4019,7 +3852,78 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 105, + "id": "61d8f755-052f-44f9-a94b-33c05f96688b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5871" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(transit_matters_m2)" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "ec5634a7-f0ef-46de-ad34-21f1c86b9e96", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "113" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transit_matters_m2.route_id.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "08fcb84e-29cf-44f9-b368-e2f20b2d0c78", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "113" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rt_stop_times4.route_id.nunique()" + ] + }, + { + "cell_type": "markdown", + "id": "b81c12f0-48cb-4382-8ddf-433c3c48114f", + "metadata": {}, + "source": [ + "#### City of Visalia \t has a lot of bunched trips." + ] + }, + { + "cell_type": "code", + "execution_count": 108, "id": "086eace2-81df-46fa-9bd4-c6aa5ea3db79", "metadata": { "scrolled": true, @@ -4481,286 +4385,6 @@ " 3.00\n", " 20.00\n", " \n", - " \n", - " 2621\n", - " 06 - Fresno\n", - " City of Visalia\n", - " Route 9\n", - " Bus\n", - " 60da59c7000ea5dcb5f845d8fa227f14\n", - " 2042\n", - " 2307720\n", - " Westbound\n", - " 15\n", - " 3.00\n", - " 20.00\n", - " \n", - " \n", - " 2620\n", - " 06 - Fresno\n", - " City of Visalia\n", - " Route 9\n", - " Bus\n", - " 60da59c7000ea5dcb5f845d8fa227f14\n", - " 2042\n", - " 3869709\n", - " Westbound\n", - " 15\n", - " 3.00\n", - " 20.00\n", - " \n", - " \n", - " 2619\n", - " 06 - Fresno\n", - " City of Visalia\n", - " Route 9\n", - " Bus\n", - " 60da59c7000ea5dcb5f845d8fa227f14\n", - " 2042\n", - " 3869710\n", - " Westbound\n", - " 15\n", - " 3.00\n", - " 20.00\n", - " \n", - " \n", - " 2626\n", - " 06 - Fresno\n", - " City of Visalia\n", - " Route 9\n", - " Bus\n", - " 60da59c7000ea5dcb5f845d8fa227f14\n", - " 2042\n", - " 2307700\n", - " Westbound\n", - " 15\n", - " 3.00\n", - " 20.00\n", - " \n", - " \n", - " 4143\n", - " 03 - Marysville\n", - " Yolo County Transportation District\n", - " ROUTE 215 WB\n", - " Bus\n", - " e939d633652e2af6d3aa82d28a042dbf\n", - " 07959480-2a40-4a51-92ac-8ca2029d5f4f\n", - " a1d203a6-cfdd-40a1-af2f-0fa502ea65b4\n", - " Westbound\n", - " 6\n", - " 1.00\n", - " 16.67\n", - " \n", - " \n", - " 269\n", - " 07 - Los Angeles\n", - " Los Angeles County Metropolitan Transportation Authority\n", - " Metro Local Line\n", - " Bus\n", - " 4e1e59e768bfbf8c30d0369f89e27aa6\n", - " 603-13172\n", - " 8066\n", - " Northbound\n", - " 67\n", - " 10.00\n", - " 14.93\n", - " \n", - " \n", - " 267\n", - " 07 - Los Angeles\n", - " Los Angeles County Metropolitan Transportation Authority\n", - " Metro Local Line\n", - " Bus\n", - " 4e1e59e768bfbf8c30d0369f89e27aa6\n", - " 603-13172\n", - " 3362\n", - " Northbound\n", - " 67\n", - " 10.00\n", - " 14.93\n", - " \n", - " \n", - " 3866\n", - " 05 - San Luis Obispo\n", - " Santa Cruz Metropolitan Transit District\n", - " River/Harvey West/Emeline\n", - " Bus\n", - " 56f57a645c3aff1c35f50d5ea1385a75\n", - " 4\n", - " 1214\n", - " Eastbound\n", - " 7\n", - " 1.00\n", - " 14.29\n", - " \n", - " \n", - " 3867\n", - " 05 - San Luis Obispo\n", - " Santa Cruz Metropolitan Transit District\n", - " River/Harvey West/Emeline\n", - " Bus\n", - " 56f57a645c3aff1c35f50d5ea1385a75\n", - " 4\n", - " 1215\n", - " Eastbound\n", - " 7\n", - " 1.00\n", - " 14.29\n", - " \n", - " \n", - " 99\n", - " 07 - Los Angeles\n", - " Los Angeles County Metropolitan Transportation Authority\n", - " Metro Local Line\n", - " Bus\n", - " 6a10ede3fa469c8b4d9bf761946ed20a\n", - " 204-13172\n", - " 3963\n", - " Northbound\n", - " 92\n", - " 13.00\n", - " 14.13\n", - " \n", - " \n", - " 98\n", - " 07 - Los Angeles\n", - " Los Angeles County Metropolitan Transportation Authority\n", - " Metro Local Line\n", - " Bus\n", - " 6a10ede3fa469c8b4d9bf761946ed20a\n", - " 204-13172\n", - " 7092\n", - " Northbound\n", - " 92\n", - " 13.00\n", - " 14.13\n", - " \n", - " \n", - " 726\n", - " 07 - Los Angeles\n", - " Los Angeles County Metropolitan Transportation Authority\n", - " Metro Local Line\n", - " Bus\n", - " 2e0e2720bd3b6c1510ab780896264ae4\n", - " 206-13172\n", - " 14032\n", - " Southbound\n", - " 44\n", - " 6.00\n", - " 13.64\n", - " \n", - " \n", - " 716\n", - " 07 - Los Angeles\n", - " Los Angeles County Metropolitan Transportation Authority\n", - " Metro Local Line\n", - " Bus\n", - " 2e0e2720bd3b6c1510ab780896264ae4\n", - " 206-13172\n", - " 14027\n", - " Southbound\n", - " 45\n", - " 6.00\n", - " 13.33\n", - " \n", - " \n", - " 97\n", - " 07 - Los Angeles\n", - " Los Angeles County Metropolitan Transportation Authority\n", - " Metro Local Line\n", - " Bus\n", - " 6a10ede3fa469c8b4d9bf761946ed20a\n", - " 204-13172\n", - " 5652\n", - " Northbound\n", - " 93\n", - " 12.00\n", - " 12.90\n", - " \n", - " \n", - " 3644\n", - " 05 - San Luis Obispo\n", - " Santa Cruz Metropolitan Transit District\n", - " UCSC/Capitola Mall/Live Oak\n", - " Bus\n", - " 9b4a79b5b21e2ca01ee032b1c39ca3c5\n", - " 3B\n", - " 1388\n", - " Eastbound\n", - " 8\n", - " 1.00\n", - " 12.50\n", - " \n", - " \n", - " 3625\n", - " 03 - Marysville\n", - " Yolo County Transportation District\n", - " ROUTE 215 WB\n", - " Bus\n", - " e939d633652e2af6d3aa82d28a042dbf\n", - " 07959480-2a40-4a51-92ac-8ca2029d5f4f\n", - " c8a9bfc8-7e84-483b-95bc-02a1494c3ae3\n", - " Westbound\n", - " 8\n", - " 1.00\n", - " 12.50\n", - " \n", - " \n", - " 268\n", - " 07 - Los Angeles\n", - " Los Angeles County Metropolitan Transportation Authority\n", - " Metro Local Line\n", - " Bus\n", - " 4e1e59e768bfbf8c30d0369f89e27aa6\n", - " 603-13172\n", - " 8689\n", - " Northbound\n", - " 67\n", - " 8.00\n", - " 11.94\n", - " \n", - " \n", - " 576\n", - " 07 - Los Angeles\n", - " Los Angeles County Metropolitan Transportation Authority\n", - " Metro Local Line\n", - " Bus\n", - " 2e0e2720bd3b6c1510ab780896264ae4\n", - " 206-13172\n", - " 14078\n", - " Southbound\n", - " 51\n", - " 6.00\n", - " 11.76\n", - " \n", - " \n", - " 262\n", - " 07 - Los Angeles\n", - " Los Angeles County Metropolitan Transportation Authority\n", - " Metro Local Line\n", - " Bus\n", - " 4e1e59e768bfbf8c30d0369f89e27aa6\n", - " 603-13172\n", - " 2203\n", - " Northbound\n", - " 68\n", - " 8.00\n", - " 11.76\n", - " \n", - " \n", - " 592\n", - " 07 - Los Angeles\n", - " Los Angeles County Metropolitan Transportation Authority\n", - " Metro Local Line\n", - " Bus\n", - " 2e0e2720bd3b6c1510ab780896264ae4\n", - " 206-13172\n", - " 15437\n", - " Southbound\n", - " 51\n", - " 6.00\n", - " 11.76\n", - " \n", " \n", "\n", "" @@ -4797,26 +4421,6 @@ "2622 06 - Fresno \n", "4284 05 - San Luis Obispo \n", "2585 06 - Fresno \n", - "2621 06 - Fresno \n", - "2620 06 - Fresno \n", - "2619 06 - Fresno \n", - "2626 06 - Fresno \n", - "4143 03 - Marysville \n", - "269 07 - Los Angeles \n", - "267 07 - Los Angeles \n", - "3866 05 - San Luis Obispo \n", - "3867 05 - San Luis Obispo \n", - "99 07 - Los Angeles \n", - "98 07 - Los Angeles \n", - "726 07 - Los Angeles \n", - "716 07 - Los Angeles \n", - "97 07 - Los Angeles \n", - "3644 05 - San Luis Obispo \n", - "3625 03 - Marysville \n", - "268 07 - Los Angeles \n", - "576 07 - Los Angeles \n", - "262 07 - Los Angeles \n", - "592 07 - Los Angeles \n", "\n", " organization_name \\\n", "5599 Peninsula Corridor Joint Powers Board \n", @@ -4849,258 +4453,149 @@ "2622 City of Visalia \n", "4284 Santa Cruz Metropolitan Transit District \n", "2585 City of Visalia \n", - "2621 City of Visalia \n", - "2620 City of Visalia \n", - "2619 City of Visalia \n", - "2626 City of Visalia \n", - "4143 Yolo County Transportation District \n", - "269 Los Angeles County Metropolitan Transportation Authority \n", - "267 Los Angeles County Metropolitan Transportation Authority \n", - "3866 Santa Cruz Metropolitan Transit District \n", - "3867 Santa Cruz Metropolitan Transit District \n", - "99 Los Angeles County Metropolitan Transportation Authority \n", - "98 Los Angeles County Metropolitan Transportation Authority \n", - "726 Los Angeles County Metropolitan Transportation Authority \n", - "716 Los Angeles County Metropolitan Transportation Authority \n", - "97 Los Angeles County Metropolitan Transportation Authority \n", - "3644 Santa Cruz Metropolitan Transit District \n", - "3625 Yolo County Transportation District \n", - "268 Los Angeles County Metropolitan Transportation Authority \n", - "576 Los Angeles County Metropolitan Transportation Authority \n", - "262 Los Angeles County Metropolitan Transportation Authority \n", - "592 Los Angeles County Metropolitan Transportation Authority \n", - "\n", - " route_long_name route_type_str \\\n", - "5599 Local Rail \n", - "5582 Local Rail \n", - "728 Hollis Bus \n", - "4362 Metro Local Line Bus \n", - "4356 Metro Local Line Bus \n", - "4337 Metro Local Line Bus \n", - "2911 Route 9 Bus \n", - "2588 Route 9 Bus \n", - "2631 Route 9 Bus \n", - "2625 Route 9 Bus \n", - "2611 Route 9 Bus \n", - "2600 Route 9 Bus \n", - "2599 Route 9 Bus \n", - "2597 Route 9 Bus \n", - "2596 Route 9 Bus \n", - "2593 Route 9 Bus \n", - "2592 Route 9 Bus \n", - "2591 Route 9 Bus \n", - "2590 Route 9 Bus \n", - "2587 Route 9 Bus \n", - "2627 Route 9 Bus \n", - "2606 Route 9 Bus \n", - "2589 Route 9 Bus \n", - "2583 Route 9 Bus \n", - "2624 Route 9 Bus \n", - "2633 Route 9 Bus \n", - "2623 Route 9 Bus \n", - "2622 Route 9 Bus \n", - "4284 River/Harvey West/Emeline Bus \n", - "2585 Route 9 Bus \n", - "2621 Route 9 Bus \n", - "2620 Route 9 Bus \n", - "2619 Route 9 Bus \n", - "2626 Route 9 Bus \n", - "4143 ROUTE 215 WB Bus \n", - "269 Metro Local Line Bus \n", - "267 Metro Local Line Bus \n", - "3866 River/Harvey West/Emeline Bus \n", - "3867 River/Harvey West/Emeline Bus \n", - "99 Metro Local Line Bus \n", - "98 Metro Local Line Bus \n", - "726 Metro Local Line Bus \n", - "716 Metro Local Line Bus \n", - "97 Metro Local Line Bus \n", - "3644 UCSC/Capitola Mall/Live Oak Bus \n", - "3625 ROUTE 215 WB Bus \n", - "268 Metro Local Line Bus \n", - "576 Metro Local Line Bus \n", - "262 Metro Local Line Bus \n", - "592 Metro Local Line Bus \n", "\n", - " shape_array_key route_id \\\n", - "5599 8c4de04e7398d418c12cc1541651e951 L1 \n", - "5582 8c4de04e7398d418c12cc1541651e951 L1 \n", - "728 0628e405f93c2d0b5e3e68a7115857d4 Hollis \n", - "4362 0688a14c97a2ebfe90f5674c1262d741 217-13172 \n", - "4356 0688a14c97a2ebfe90f5674c1262d741 217-13172 \n", - "4337 0688a14c97a2ebfe90f5674c1262d741 217-13172 \n", - "2911 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2588 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2631 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2625 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2611 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2600 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2599 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2597 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2596 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2593 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2592 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2591 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2590 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2587 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2627 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2606 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2589 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2583 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2624 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2633 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2623 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2622 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "4284 56f57a645c3aff1c35f50d5ea1385a75 4 \n", - "2585 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2621 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2620 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2619 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "2626 60da59c7000ea5dcb5f845d8fa227f14 2042 \n", - "4143 e939d633652e2af6d3aa82d28a042dbf 07959480-2a40-4a51-92ac-8ca2029d5f4f \n", - "269 4e1e59e768bfbf8c30d0369f89e27aa6 603-13172 \n", - "267 4e1e59e768bfbf8c30d0369f89e27aa6 603-13172 \n", - "3866 56f57a645c3aff1c35f50d5ea1385a75 4 \n", - "3867 56f57a645c3aff1c35f50d5ea1385a75 4 \n", - "99 6a10ede3fa469c8b4d9bf761946ed20a 204-13172 \n", - "98 6a10ede3fa469c8b4d9bf761946ed20a 204-13172 \n", - "726 2e0e2720bd3b6c1510ab780896264ae4 206-13172 \n", - "716 2e0e2720bd3b6c1510ab780896264ae4 206-13172 \n", - "97 6a10ede3fa469c8b4d9bf761946ed20a 204-13172 \n", - "3644 9b4a79b5b21e2ca01ee032b1c39ca3c5 3B \n", - "3625 e939d633652e2af6d3aa82d28a042dbf 07959480-2a40-4a51-92ac-8ca2029d5f4f \n", - "268 4e1e59e768bfbf8c30d0369f89e27aa6 603-13172 \n", - "576 2e0e2720bd3b6c1510ab780896264ae4 206-13172 \n", - "262 4e1e59e768bfbf8c30d0369f89e27aa6 603-13172 \n", - "592 2e0e2720bd3b6c1510ab780896264ae4 206-13172 \n", + " route_long_name route_type_str \\\n", + "5599 Local Rail \n", + "5582 Local Rail \n", + "728 Hollis Bus \n", + "4362 Metro Local Line Bus \n", + "4356 Metro Local Line Bus \n", + "4337 Metro Local Line Bus \n", + "2911 Route 9 Bus \n", + "2588 Route 9 Bus \n", + "2631 Route 9 Bus \n", + "2625 Route 9 Bus \n", + "2611 Route 9 Bus \n", + "2600 Route 9 Bus \n", + "2599 Route 9 Bus \n", + "2597 Route 9 Bus \n", + "2596 Route 9 Bus \n", + "2593 Route 9 Bus \n", + "2592 Route 9 Bus \n", + "2591 Route 9 Bus \n", + "2590 Route 9 Bus \n", + "2587 Route 9 Bus \n", + "2627 Route 9 Bus \n", + "2606 Route 9 Bus \n", + "2589 Route 9 Bus \n", + "2583 Route 9 Bus \n", + "2624 Route 9 Bus \n", + "2633 Route 9 Bus \n", + "2623 Route 9 Bus \n", + "2622 Route 9 Bus \n", + "4284 River/Harvey West/Emeline Bus \n", + "2585 Route 9 Bus \n", "\n", - " stop_id route_primary_direction all_trips \\\n", - "5599 70022 Eastbound 1 \n", - "5582 70032 Eastbound 1 \n", - "728 855335 Northbound 44 \n", - "4362 104720 Southbound 4 \n", - "4356 11013 Southbound 4 \n", - "4337 11031 Southbound 4 \n", - "2911 2307469 Westbound 13 \n", - "2588 2307692 Westbound 15 \n", - "2631 2307698 Westbound 15 \n", - "2625 2307716 Westbound 15 \n", - "2611 2307718 Westbound 15 \n", - "2600 2307356 Westbound 15 \n", - "2599 2307466 Westbound 15 \n", - "2597 2307467 Westbound 15 \n", - "2596 2307695 Westbound 15 \n", - "2593 2307686 Westbound 15 \n", - "2592 2307687 Westbound 15 \n", - "2591 2307689 Westbound 15 \n", - "2590 2307690 Westbound 15 \n", - "2587 2307693 Westbound 15 \n", - "2627 2307699 Westbound 15 \n", - "2606 2307355 Westbound 15 \n", - "2589 2307691 Westbound 15 \n", - "2583 2307468 Westbound 15 \n", - "2624 2307696 Westbound 15 \n", - "2633 2307697 Westbound 15 \n", - "2623 2307717 Westbound 15 \n", - "2622 2307719 Westbound 15 \n", - "4284 1216 Eastbound 5 \n", - "2585 2307694 Westbound 15 \n", - "2621 2307720 Westbound 15 \n", - "2620 3869709 Westbound 15 \n", - "2619 3869710 Westbound 15 \n", - "2626 2307700 Westbound 15 \n", - "4143 a1d203a6-cfdd-40a1-af2f-0fa502ea65b4 Westbound 6 \n", - "269 8066 Northbound 67 \n", - "267 3362 Northbound 67 \n", - "3866 1214 Eastbound 7 \n", - "3867 1215 Eastbound 7 \n", - "99 3963 Northbound 92 \n", - "98 7092 Northbound 92 \n", - "726 14032 Southbound 44 \n", - "716 14027 Southbound 45 \n", - "97 5652 Northbound 93 \n", - "3644 1388 Eastbound 8 \n", - "3625 c8a9bfc8-7e84-483b-95bc-02a1494c3ae3 Westbound 8 \n", - "268 8689 Northbound 67 \n", - "576 14078 Southbound 51 \n", - "262 2203 Northbound 68 \n", - "592 15437 Southbound 51 \n", + " shape_array_key route_id stop_id \\\n", + "5599 8c4de04e7398d418c12cc1541651e951 L1 70022 \n", + "5582 8c4de04e7398d418c12cc1541651e951 L1 70032 \n", + "728 0628e405f93c2d0b5e3e68a7115857d4 Hollis 855335 \n", + "4362 0688a14c97a2ebfe90f5674c1262d741 217-13172 104720 \n", + "4356 0688a14c97a2ebfe90f5674c1262d741 217-13172 11013 \n", + "4337 0688a14c97a2ebfe90f5674c1262d741 217-13172 11031 \n", + "2911 60da59c7000ea5dcb5f845d8fa227f14 2042 2307469 \n", + "2588 60da59c7000ea5dcb5f845d8fa227f14 2042 2307692 \n", + "2631 60da59c7000ea5dcb5f845d8fa227f14 2042 2307698 \n", + "2625 60da59c7000ea5dcb5f845d8fa227f14 2042 2307716 \n", + "2611 60da59c7000ea5dcb5f845d8fa227f14 2042 2307718 \n", + "2600 60da59c7000ea5dcb5f845d8fa227f14 2042 2307356 \n", + "2599 60da59c7000ea5dcb5f845d8fa227f14 2042 2307466 \n", + "2597 60da59c7000ea5dcb5f845d8fa227f14 2042 2307467 \n", + "2596 60da59c7000ea5dcb5f845d8fa227f14 2042 2307695 \n", + "2593 60da59c7000ea5dcb5f845d8fa227f14 2042 2307686 \n", + "2592 60da59c7000ea5dcb5f845d8fa227f14 2042 2307687 \n", + "2591 60da59c7000ea5dcb5f845d8fa227f14 2042 2307689 \n", + "2590 60da59c7000ea5dcb5f845d8fa227f14 2042 2307690 \n", + "2587 60da59c7000ea5dcb5f845d8fa227f14 2042 2307693 \n", + "2627 60da59c7000ea5dcb5f845d8fa227f14 2042 2307699 \n", + "2606 60da59c7000ea5dcb5f845d8fa227f14 2042 2307355 \n", + "2589 60da59c7000ea5dcb5f845d8fa227f14 2042 2307691 \n", + "2583 60da59c7000ea5dcb5f845d8fa227f14 2042 2307468 \n", + "2624 60da59c7000ea5dcb5f845d8fa227f14 2042 2307696 \n", + "2633 60da59c7000ea5dcb5f845d8fa227f14 2042 2307697 \n", + "2623 60da59c7000ea5dcb5f845d8fa227f14 2042 2307717 \n", + "2622 60da59c7000ea5dcb5f845d8fa227f14 2042 2307719 \n", + "4284 56f57a645c3aff1c35f50d5ea1385a75 4 1216 \n", + "2585 60da59c7000ea5dcb5f845d8fa227f14 2042 2307694 \n", "\n", - " bunched_trips pct_trips_bunched \n", - "5599 1.00 100.00 \n", - "5582 1.00 100.00 \n", - "728 12.00 27.27 \n", - "4362 1.00 25.00 \n", - "4356 1.00 25.00 \n", - "4337 1.00 25.00 \n", - "2911 3.00 23.08 \n", - "2588 3.00 20.00 \n", - "2631 3.00 20.00 \n", - "2625 3.00 20.00 \n", - "2611 3.00 20.00 \n", - "2600 3.00 20.00 \n", - "2599 3.00 20.00 \n", - "2597 3.00 20.00 \n", - "2596 3.00 20.00 \n", - "2593 3.00 20.00 \n", - "2592 3.00 20.00 \n", - "2591 3.00 20.00 \n", - "2590 3.00 20.00 \n", - "2587 3.00 20.00 \n", - "2627 3.00 20.00 \n", - "2606 3.00 20.00 \n", - "2589 3.00 20.00 \n", - "2583 3.00 20.00 \n", - "2624 3.00 20.00 \n", - "2633 3.00 20.00 \n", - "2623 3.00 20.00 \n", - "2622 3.00 20.00 \n", - "4284 1.00 20.00 \n", - "2585 3.00 20.00 \n", - "2621 3.00 20.00 \n", - "2620 3.00 20.00 \n", - "2619 3.00 20.00 \n", - "2626 3.00 20.00 \n", - "4143 1.00 16.67 \n", - "269 10.00 14.93 \n", - "267 10.00 14.93 \n", - "3866 1.00 14.29 \n", - "3867 1.00 14.29 \n", - "99 13.00 14.13 \n", - "98 13.00 14.13 \n", - "726 6.00 13.64 \n", - "716 6.00 13.33 \n", - "97 12.00 12.90 \n", - "3644 1.00 12.50 \n", - "3625 1.00 12.50 \n", - "268 8.00 11.94 \n", - "576 6.00 11.76 \n", - "262 8.00 11.76 \n", - "592 6.00 11.76 " + " route_primary_direction all_trips bunched_trips pct_trips_bunched \n", + "5599 Eastbound 1 1.00 100.00 \n", + "5582 Eastbound 1 1.00 100.00 \n", + "728 Northbound 44 12.00 27.27 \n", + "4362 Southbound 4 1.00 25.00 \n", + "4356 Southbound 4 1.00 25.00 \n", + "4337 Southbound 4 1.00 25.00 \n", + "2911 Westbound 13 3.00 23.08 \n", + "2588 Westbound 15 3.00 20.00 \n", + "2631 Westbound 15 3.00 20.00 \n", + "2625 Westbound 15 3.00 20.00 \n", + "2611 Westbound 15 3.00 20.00 \n", + "2600 Westbound 15 3.00 20.00 \n", + "2599 Westbound 15 3.00 20.00 \n", + "2597 Westbound 15 3.00 20.00 \n", + "2596 Westbound 15 3.00 20.00 \n", + "2593 Westbound 15 3.00 20.00 \n", + "2592 Westbound 15 3.00 20.00 \n", + "2591 Westbound 15 3.00 20.00 \n", + "2590 Westbound 15 3.00 20.00 \n", + "2587 Westbound 15 3.00 20.00 \n", + "2627 Westbound 15 3.00 20.00 \n", + "2606 Westbound 15 3.00 20.00 \n", + "2589 Westbound 15 3.00 20.00 \n", + "2583 Westbound 15 3.00 20.00 \n", + "2624 Westbound 15 3.00 20.00 \n", + "2633 Westbound 15 3.00 20.00 \n", + "2623 Westbound 15 3.00 20.00 \n", + "2622 Westbound 15 3.00 20.00 \n", + "4284 Eastbound 5 1.00 20.00 \n", + "2585 Westbound 15 3.00 20.00 " ] }, - "execution_count": 121, + "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "transit_matters_m2.sort_values(by = [\"pct_trips_bunched\"], ascending = False).head(50)" + "transit_matters_m2.sort_values(by=[\"pct_trips_bunched\"], ascending=False).head(30)" + ] + }, + { + "cell_type": "markdown", + "id": "710770ad-2d1a-4636-9667-02d320b689e8", + "metadata": {}, + "source": [ + "#### What to do if order is switched? \n", + "* Row 91379: the -1 day + 23:15 is confusing to me" ] }, { "cell_type": "code", - "execution_count": 125, - "id": "184ea397-489e-49b1-a325-aa3ee74f5e38", + "execution_count": 145, + "id": "56e4a936-215b-4bdd-8f45-e86b613198bc", + "metadata": {}, + "outputs": [], + "source": [ + "preview_cols = [\n", + " \"converted_rt_arrival\",\n", + " \"actual_arrival_lag\",\n", + " \"converted_schd_arrival\",\n", + " \"scheduled_arrival_lag\",\n", + " \"pct_actual_schd_headway\",\n", + " \"bunched_y_n\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "id": "1e24cb76-4f68-4377-a0df-cedb21904b6e", "metadata": {}, "outputs": [], "source": [ "example2 = transit_matters_df1.loc[\n", " (transit_matters_df1.stop_id == \"2307719\")\n", - " & (\n", - " transit_matters_df1.organization_name\n", - " == \"City of Visalia\"\n", - " )\n", + " & (transit_matters_df1.organization_name == \"City of Visalia\")\n", " & (transit_matters_df1.route_id == \"2042\")\n", " & (transit_matters_df1.shape_array_key == \"60da59c7000ea5dcb5f845d8fa227f14\")\n", "]" @@ -5108,7 +4603,7 @@ }, { "cell_type": "code", - "execution_count": 126, + "execution_count": 146, "id": "8b34f0a6-6a2b-4e56-b0fe-bd0f57b53db8", "metadata": {}, "outputs": [ @@ -5350,356 +4845,1737 @@ "91383 -1 days +23:15:00 -1.00 bunched " ] }, - "execution_count": 126, + "execution_count": 146, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "example2[\n", - " [\n", - " \"converted_rt_arrival\",\n", - " \"actual_arrival_lag\",\n", - " \"converted_schd_arrival\",\n", - " \"scheduled_arrival_lag\",\n", - " \"pct_actual_schd_headway\",\n", - " \"bunched_y_n\",\n", - " ]\n", - "]" + "example2[preview_cols]" ] }, { - "cell_type": "markdown", - "id": "b0579e78-2a95-4d8b-9761-2824aa39a8eb", + "cell_type": "code", + "execution_count": 111, + "id": "75f8a87a-ef1b-4227-866a-e448fa62f14d", "metadata": {}, + "outputs": [], "source": [ - "### Use 2 minute benchmark\n", - "* [Source](https://static1.squarespace.com/static/533b9a24e4b01d79d0ae4376/t/645e82de1f570b31497c44dc/1683915486889/TransitMatters-Headwaymanagement.pdf)\n", - "* Justifying the use of\n", - "headway maintenance. For example, in April\n", - "2022 the 66 bus significantly bunched around\n", - "several stops. When bunching is defined as\n", - "buses that run within two minutes or less of\n", - "each other, inbound buses towards Nubian\n", - "Square bunched 10% of the time at Brigham\n", - "Circle, 9% at Brookline Village and Roxbury\n", - "Crossing, and 8% of the time at Coolidge\n", - "Corner. Bunching is even more dramatic\n", - "outbound towards Harvard Square where\n", - "buses bunched over 35% of the time at Winship\n", - "St, 13% at Coolidge Corner and Harvard Ave at\n", - "Commonwealth Ave, and 12% at North Harvard\n", - "St at Western Ave. View more data about bus\n", - "bunching through the TransitMatters Data\n", - "Dashboard here.\n", - "\n", - "* To Do: add back in route & operator information" + "example3 = transit_matters_df1.loc[\n", + " (transit_matters_df1.stop_id == \"2307698\")\n", + " & (transit_matters_df1.organization_name == \"City of Visalia\")\n", + " & (transit_matters_df1.route_id == \"2042\")\n", + " & (transit_matters_df1.shape_array_key == \"60da59c7000ea5dcb5f845d8fa227f14\")\n", + "]" ] }, { "cell_type": "code", - "execution_count": 114, - "id": "e0706e7e-0d56-43b2-bf3c-4205e9277c64", + "execution_count": 153, + "id": "a852b6c9-9d08-4f57-87fd-fff3657ee2fc", "metadata": {}, "outputs": [ { - "ename": "NameError", - "evalue": "name 'rt_stop_times5' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[114], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m two_minutess_df \u001b[38;5;241m=\u001b[39m \u001b[43mrt_stop_times5\u001b[49m\u001b[38;5;241m.\u001b[39mcopy()\n", - "\u001b[0;31mNameError\u001b[0m: name 'rt_stop_times5' is not defined" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
converted_rt_arrivalactual_arrival_lagconverted_schd_arrivalscheduled_arrival_lagpct_actual_schd_headwaybunched_y_n
916362024-05-22 06:59:51NaT2024-05-22 06:59:00NaTNaNnot bunched
916372024-05-22 07:57:190 days 00:57:282024-05-22 07:44:000 days 00:45:001.28not bunched
916382024-05-22 08:36:030 days 00:38:442024-05-22 08:29:000 days 00:45:000.86not bunched
916392024-05-22 09:20:370 days 00:44:342024-05-22 09:14:000 days 00:45:000.99not bunched
916402024-05-22 10:01:360 days 00:40:592024-05-22 09:59:000 days 00:45:000.91not bunched
916412024-05-22 10:50:300 days 00:48:542024-05-22 10:44:000 days 00:45:001.09not bunched
916422024-05-22 11:30:120 days 00:39:422024-05-22 11:29:000 days 00:45:000.88not bunched
916432024-05-22 12:22:420 days 00:52:302024-05-22 12:14:000 days 00:45:001.17not bunched
916442024-05-22 12:59:070 days 00:36:252024-05-22 12:59:000 days 00:45:000.81not bunched
916452024-05-22 13:46:210 days 00:47:142024-05-22 13:44:000 days 00:45:001.05not bunched
916462024-05-22 14:31:100 days 00:44:492024-05-22 14:29:000 days 00:45:001.00not bunched
916472024-05-22 15:17:010 days 00:45:512024-05-22 15:14:000 days 00:45:001.02not bunched
916482024-05-22 16:44:010 days 01:27:002024-05-22 16:44:000 days 01:30:000.97not bunched
916492024-05-22 17:35:320 days 00:51:312024-05-22 15:59:00-1 days +23:15:00-1.14bunched
916502024-05-22 18:15:020 days 00:39:302024-05-22 18:14:000 days 02:15:000.29not bunched
916512024-05-22 19:03:140 days 00:48:122024-05-22 17:29:00-1 days +23:15:00-1.07bunched
916522024-05-22 19:44:500 days 00:41:362024-05-22 19:44:000 days 02:15:000.31not bunched
916532024-05-22 20:32:330 days 00:47:432024-05-22 18:59:00-1 days +23:15:00-1.06bunched
\n", + "
" + ], + "text/plain": [ + " converted_rt_arrival actual_arrival_lag converted_schd_arrival \\\n", + "91636 2024-05-22 06:59:51 NaT 2024-05-22 06:59:00 \n", + "91637 2024-05-22 07:57:19 0 days 00:57:28 2024-05-22 07:44:00 \n", + "91638 2024-05-22 08:36:03 0 days 00:38:44 2024-05-22 08:29:00 \n", + "91639 2024-05-22 09:20:37 0 days 00:44:34 2024-05-22 09:14:00 \n", + "91640 2024-05-22 10:01:36 0 days 00:40:59 2024-05-22 09:59:00 \n", + "91641 2024-05-22 10:50:30 0 days 00:48:54 2024-05-22 10:44:00 \n", + "91642 2024-05-22 11:30:12 0 days 00:39:42 2024-05-22 11:29:00 \n", + "91643 2024-05-22 12:22:42 0 days 00:52:30 2024-05-22 12:14:00 \n", + "91644 2024-05-22 12:59:07 0 days 00:36:25 2024-05-22 12:59:00 \n", + "91645 2024-05-22 13:46:21 0 days 00:47:14 2024-05-22 13:44:00 \n", + "91646 2024-05-22 14:31:10 0 days 00:44:49 2024-05-22 14:29:00 \n", + "91647 2024-05-22 15:17:01 0 days 00:45:51 2024-05-22 15:14:00 \n", + "91648 2024-05-22 16:44:01 0 days 01:27:00 2024-05-22 16:44:00 \n", + "91649 2024-05-22 17:35:32 0 days 00:51:31 2024-05-22 15:59:00 \n", + "91650 2024-05-22 18:15:02 0 days 00:39:30 2024-05-22 18:14:00 \n", + "91651 2024-05-22 19:03:14 0 days 00:48:12 2024-05-22 17:29:00 \n", + "91652 2024-05-22 19:44:50 0 days 00:41:36 2024-05-22 19:44:00 \n", + "91653 2024-05-22 20:32:33 0 days 00:47:43 2024-05-22 18:59:00 \n", + "\n", + " scheduled_arrival_lag pct_actual_schd_headway bunched_y_n \n", + "91636 NaT NaN not bunched \n", + "91637 0 days 00:45:00 1.28 not bunched \n", + "91638 0 days 00:45:00 0.86 not bunched \n", + "91639 0 days 00:45:00 0.99 not bunched \n", + "91640 0 days 00:45:00 0.91 not bunched \n", + "91641 0 days 00:45:00 1.09 not bunched \n", + "91642 0 days 00:45:00 0.88 not bunched \n", + "91643 0 days 00:45:00 1.17 not bunched \n", + "91644 0 days 00:45:00 0.81 not bunched \n", + "91645 0 days 00:45:00 1.05 not bunched \n", + "91646 0 days 00:45:00 1.00 not bunched \n", + "91647 0 days 00:45:00 1.02 not bunched \n", + "91648 0 days 01:30:00 0.97 not bunched \n", + "91649 -1 days +23:15:00 -1.14 bunched \n", + "91650 0 days 02:15:00 0.29 not bunched \n", + "91651 -1 days +23:15:00 -1.07 bunched \n", + "91652 0 days 02:15:00 0.31 not bunched \n", + "91653 -1 days +23:15:00 -1.06 bunched " + ] + }, + "execution_count": 153, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "two_minutess_df = rt_stop_times5.copy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c2973b95-3207-47d8-9485-65e9544b0163", - "metadata": {}, - "outputs": [], - "source": [ - "two_minutess_df.columns" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a0c04034-7c1a-49c9-9270-8bf9af8feac1", - "metadata": {}, - "outputs": [], - "source": [ - "two_minutess_df[\"actual_headway_min\"] = two_minutess_df.rt_arrival_sec / 60" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bd9b8ee4-aab3-42c9-b29a-9679f5494ffa", - "metadata": {}, - "outputs": [], - "source": [ - "two_minutess_df[\"bunched_y_n\"] = np.where(\n", - " two_minutess_df[\"actual_headway_min\"] <= 2, \"bunched\", \"not bunched\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "83382d9f-09c5-4bf2-a55b-41dc541dee51", - "metadata": {}, - "outputs": [], - "source": [ - "two_minutess_df.info()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dcd302cb-5f30-4318-8b28-cb29f6c376cd", - "metadata": {}, - "outputs": [], - "source": [ - "two_minutess_df.bunched_y_n.value_counts()" + "example3[preview_cols]" ] }, { "cell_type": "markdown", - "id": "626c2f22-4d45-4fe2-814d-773ae9dbd843", - "metadata": {}, - "source": [ - "#### Same code as Transit Matters Approach" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f2532ed7-194c-4b2d-841c-a2282379a44a", - "metadata": {}, - "outputs": [], - "source": [ - "two_minutes_agg1 = (\n", - " two_minutess_df.groupby(\n", - " [\n", - " \"schedule_gtfs_dataset_key\",\n", - " \"route_long_name\",\n", - " \"shape_array_key\",\n", - " \"route_id\",\n", - " \"stop_id\",\n", - " \"direction_id\",\n", - " \"route_primary_direction\",\n", - " \"bunched_y_n\",\n", - " ]\n", - " )\n", - " .agg({\"trip_instance_key\": \"nunique\"})\n", - " .reset_index()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "666be796-5edd-42aa-bffb-a5105d0753f5", + "id": "b0579e78-2a95-4d8b-9761-2824aa39a8eb", "metadata": {}, - "outputs": [], "source": [ - "bunched_only_two_min = (\n", - " two_minutes_agg1.loc[two_minutes_agg1.bunched_y_n == \"bunched\"]\n", - " .reset_index(drop=True)\n", - " .rename(columns={\"trip_instance_key\": \"bunched_trips\"})\n", - ")" + "### Use 2 minute benchmark\n", + "* [Source](https://static1.squarespace.com/static/533b9a24e4b01d79d0ae4376/t/645e82de1f570b31497c44dc/1683915486889/TransitMatters-Headwaymanagement.pdf)\n", + "* Justifying the use of\n", + "headway maintenance. For example, in April\n", + "2022 the 66 bus significantly bunched around\n", + "several stops. When bunching is defined as\n", + "buses that run within two minutes or less of\n", + "each other, inbound buses towards Nubian\n", + "Square bunched 10% of the time at Brigham\n", + "Circle, 9% at Brookline Village and Roxbury\n", + "Crossing, and 8% of the time at Coolidge\n", + "Corner. Bunching is even more dramatic\n", + "outbound towards Harvard Square where\n", + "buses bunched over 35% of the time at Winship\n", + "St, 13% at Coolidge Corner and Harvard Ave at\n", + "Commonwealth Ave, and 12% at North Harvard\n", + "St at Western Ave. View more data about bus\n", + "bunching through the TransitMatters Data\n", + "Dashboard here.\n", + "\n", + "* To Do: add back in route & operator information" ] }, { "cell_type": "code", - "execution_count": null, - "id": "37519780-57b0-47c8-a911-aa53ff7eb4d6", + "execution_count": 113, + "id": "e0706e7e-0d56-43b2-bf3c-4205e9277c64", "metadata": {}, "outputs": [], "source": [ - "# I want to do a left merge because I'm only interested in trips that bunched.\n", - "bunched_only_two_min = pd.merge(\n", - " bunched_only_two_min,\n", - " transit_matters_all_trips,\n", - " on=[\n", - " \"schedule_gtfs_dataset_key\",\n", - " \"route_long_name\",\n", - " \"shape_array_key\",\n", - " \"route_id\",\n", - " \"stop_id\",\n", - " \"direction_id\",\n", - " \"route_primary_direction\",\n", - " ],\n", - " how=\"left\",\n", - ")" + "two_minutes_df = rt_stop_times4.copy()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "993d3341-bb43-4553-852a-341b8726b710", + "execution_count": 114, + "id": "02ee792b-38f3-4b41-9be9-6672c757bb39", "metadata": {}, "outputs": [], "source": [ - "bunched_only_two_min[\"pct_trips_bunched\"] = (\n", - " bunched_only_two_min.bunched_trips / bunched_only_two_min.all_trips * 100\n", - ")" + "two_minutes_df[\"rt_mins\"] = (\n", + " two_minutes_df[\"actual_arrival_lag\"].dt.total_seconds()\n", + ") / 60" ] }, { "cell_type": "code", - "execution_count": null, - "id": "91b29863-5465-48c9-8257-bf8d84457b11", + "execution_count": 115, + "id": "b1f93a7e-aa74-4d00-a2fa-21352b4cae9d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "count 110343.00\n", + "mean 36.45\n", + "std 60.42\n", + "min 0.00\n", + "1% 2.50\n", + "2% 4.22\n", + "5% 7.17\n", + "10% 9.70\n", + "50% 24.03\n", + "90% 62.08\n", + "95% 70.05\n", + "98% 127.71\n", + "99% 228.59\n", + "max 1373.35\n", + "Name: rt_mins, dtype: float64" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "bunched_only_two_min = bunched_only_two_min.drop(columns=[\"all_trips\"])" + "two_minutes_df.rt_mins.describe(percentiles)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "b2aed36d-818a-4328-9333-7d2cac1614a1", + "execution_count": 116, + "id": "bd1c9046-579e-424a-ac8e-349028dc8738", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_idstop_idstop_sequencescheduled_arrival_secschedule_gtfs_dataset_keytrip_instance_keyrt_arrival_secroute_idshape_array_keyfeed_keyroute_long_namedirection_idroute_typeroute_primary_directionmed_headway_minutesorganization_namenamecaltrans_districtservice_dateroute_type_strscheduled_arrival_sec_copyconverted_schd_arrivalconverted_rt_arrivaldelay_minactual_arrival_lagscheduled_arrival_lagrt_mins
010204001252406-DEC237093286880.000666caf3ec1ecc96b74f4477ee4bc9392d6ea456f6d155e566e41f01c1b46370412204-131726a10ede3fa469c8b4d9bf761946ed20a608992664173210532aa3e6cc573be2fMetro Local Line0.003Northbound14.85Los Angeles County Metropolitan Transportation AuthorityLA Metro Bus Schedule07 - Los Angeles2024-05-22Bus86880.002024-05-22 00:08:002024-05-22 00:06:52-1.13NaTNaTNaN
110204001252439-DEC237093288860.000666caf3ec1ecc96b74f4477ee4bc939aed84185568efda59a9ce7342d9192022662204-131726a10ede3fa469c8b4d9bf761946ed20a608992664173210532aa3e6cc573be2fMetro Local Line0.003Northbound14.85Los Angeles County Metropolitan Transportation AuthorityLA Metro Bus Schedule07 - Los Angeles2024-05-22Bus88860.002024-05-22 00:41:002024-05-22 00:44:223.370 days 00:37:300 days 00:33:0037.50
210204001252509-DEC237093290660.000666caf3ec1ecc96b74f4477ee4bc93979aa575337434ff1eeb332de268e44c24349204-131726a10ede3fa469c8b4d9bf761946ed20a608992664173210532aa3e6cc573be2fMetro Local Line0.003Northbound14.85Los Angeles County Metropolitan Transportation AuthorityLA Metro Bus Schedule07 - Los Angeles2024-05-22Bus90660.002024-05-22 01:11:002024-05-22 01:12:291.480 days 00:28:070 days 00:30:0028.12
\n", + "
" + ], + "text/plain": [ + " trip_id stop_id stop_sequence scheduled_arrival_sec \\\n", + "0 10204001252406-DEC23 7093 2 86880.00 \n", + "1 10204001252439-DEC23 7093 2 88860.00 \n", + "2 10204001252509-DEC23 7093 2 90660.00 \n", + "\n", + " schedule_gtfs_dataset_key trip_instance_key \\\n", + "0 0666caf3ec1ecc96b74f4477ee4bc939 2d6ea456f6d155e566e41f01c1b46370 \n", + "1 0666caf3ec1ecc96b74f4477ee4bc939 aed84185568efda59a9ce7342d919202 \n", + "2 0666caf3ec1ecc96b74f4477ee4bc939 79aa575337434ff1eeb332de268e44c2 \n", + "\n", + " rt_arrival_sec route_id shape_array_key \\\n", + "0 412 204-13172 6a10ede3fa469c8b4d9bf761946ed20a \n", + "1 2662 204-13172 6a10ede3fa469c8b4d9bf761946ed20a \n", + "2 4349 204-13172 6a10ede3fa469c8b4d9bf761946ed20a \n", + "\n", + " feed_key route_long_name direction_id \\\n", + "0 608992664173210532aa3e6cc573be2f Metro Local Line 0.00 \n", + "1 608992664173210532aa3e6cc573be2f Metro Local Line 0.00 \n", + "2 608992664173210532aa3e6cc573be2f Metro Local Line 0.00 \n", + "\n", + " route_type route_primary_direction med_headway_minutes \\\n", + "0 3 Northbound 14.85 \n", + "1 3 Northbound 14.85 \n", + "2 3 Northbound 14.85 \n", + "\n", + " organization_name \\\n", + "0 Los Angeles County Metropolitan Transportation Authority \n", + "1 Los Angeles County Metropolitan Transportation Authority \n", + "2 Los Angeles County Metropolitan Transportation Authority \n", + "\n", + " name caltrans_district service_date route_type_str \\\n", + "0 LA Metro Bus Schedule 07 - Los Angeles 2024-05-22 Bus \n", + "1 LA Metro Bus Schedule 07 - Los Angeles 2024-05-22 Bus \n", + "2 LA Metro Bus Schedule 07 - Los Angeles 2024-05-22 Bus \n", + "\n", + " scheduled_arrival_sec_copy converted_schd_arrival converted_rt_arrival \\\n", + "0 86880.00 2024-05-22 00:08:00 2024-05-22 00:06:52 \n", + "1 88860.00 2024-05-22 00:41:00 2024-05-22 00:44:22 \n", + "2 90660.00 2024-05-22 01:11:00 2024-05-22 01:12:29 \n", + "\n", + " delay_min actual_arrival_lag scheduled_arrival_lag rt_mins \n", + "0 -1.13 NaT NaT NaN \n", + "1 3.37 0 days 00:37:30 0 days 00:33:00 37.50 \n", + "2 1.48 0 days 00:28:07 0 days 00:30:00 28.12 " + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "bunched_only_two_min.head(2)" + "two_minutes_df.head(3)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "9cb116be-b313-40e3-a0ef-a510b42da6e1", + "execution_count": 117, + "id": "2def9283-d995-4001-b412-0fa03a855cd5", "metadata": {}, "outputs": [], "source": [ - "# Need to do a left merge on all trips for the stops that don't have bunching.\n", - "final_two_minute = pd.merge(\n", - " transit_matters_all_trips,\n", - " bunched_only_two_min,\n", - " on=[\n", - " \"schedule_gtfs_dataset_key\",\n", - " \"route_long_name\",\n", - " \"shape_array_key\",\n", - " \"route_id\",\n", - " \"stop_id\",\n", - " \"direction_id\",\n", - " \"route_primary_direction\",\n", - " ],\n", - " how=\"left\",\n", + "two_minutes_df[\"bunched_y_n\"] = np.where(\n", + " two_minutes_df[\"rt_mins\"] <= 2, \"bunched\", \"not bunched\"\n", ")" ] }, { "cell_type": "code", - "execution_count": null, - "id": "961b9cfb-cb94-485c-bcf5-92e9b51024d4", + "execution_count": 118, + "id": "dcd302cb-5f30-4318-8b28-cb29f6c376cd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "not bunched 115586\n", + "bunched 867\n", + "Name: bunched_y_n, dtype: int64" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "final_two_minute.shape" + "two_minutes_df.bunched_y_n.value_counts()" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "cfefaf19-4eaa-4d10-a83d-79d2fe5d9c62", + "cell_type": "markdown", + "id": "626c2f22-4d45-4fe2-814d-773ae9dbd843", "metadata": {}, - "outputs": [], "source": [ - "final_two_minute = final_two_minute.drop(columns=[\"bunched_y_n\"])" + "#### Same code as Transit Matters Approach" ] }, { "cell_type": "code", - "execution_count": null, - "id": "cdae7af9-696e-42dd-8c88-ee81b4e0bfef", + "execution_count": 119, + "id": "9988fc07-bd19-4a89-acd4-044bc44b4c81", "metadata": {}, "outputs": [], "source": [ - "final_two_minute = final_two_minute.fillna(0)" + "transit_matters_agg.append(\"bunched_y_n\")" ] }, { "cell_type": "code", - "execution_count": null, - "id": "28052971-36d9-447a-89b2-ab700ee3e731", + "execution_count": 120, + "id": "27a14947-ba00-44d5-b992-213285259c0c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['caltrans_district',\n", + " 'organization_name',\n", + " 'route_long_name',\n", + " 'route_type_str',\n", + " 'shape_array_key',\n", + " 'route_id',\n", + " 'stop_id',\n", + " 'route_primary_direction',\n", + " 'bunched_y_n']" + ] + }, + "execution_count": 120, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "final_two_minute.head()" + "transit_matters_agg" ] }, { "cell_type": "code", - "execution_count": null, - "id": "43f242ec-26e7-4acb-a2a0-beaba47feea7", + "execution_count": 123, + "id": "f2532ed7-194c-4b2d-841c-a2282379a44a", "metadata": {}, "outputs": [], "source": [ - "bunched = final_two_minute.loc[final_two_minute.pct_trips_bunched != 0]" + "two_minutes_agg1 = (\n", + " two_minutes_df.groupby(transit_matters_agg)\n", + " .agg({\"trip_instance_key\": \"nunique\"})\n", + " .reset_index()\n", + ").rename(columns={\"trip_instance_key\": \"all_trips\"})" ] }, { "cell_type": "code", - "execution_count": null, - "id": "e15e7fa0-804c-4e41-8ed4-2a575c4c52bd", + "execution_count": 124, + "id": "666be796-5edd-42aa-bffb-a5105d0753f5", "metadata": {}, "outputs": [], "source": [ - "bunched.all_trips.describe()" + "bunched_only_two_min = (\n", + " two_minutes_agg1.loc[two_minutes_agg1.bunched_y_n == \"bunched\"]\n", + " .reset_index(drop=True)\n", + " .rename(columns={\"all_trips\": \"bunched_trips\"})\n", + " .drop(columns=[\"bunched_y_n\"])\n", + ")" ] }, { "cell_type": "code", - "execution_count": null, - "id": "536da591-f11b-4225-9d14-4bf046289ae2", + "execution_count": 125, + "id": "4e0b4c0f-c987-4f5e-9746-7cced56bd601", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caltrans_districtorganization_nameroute_long_nameroute_type_strshape_array_keyroute_idstop_idroute_primary_directionbunched_trips
003 - MarysvilleYolo County Transportation DistrictROUTE 215 WBBuse939d633652e2af6d3aa82d28a042dbf07959480-2a40-4a51-92ac-8ca2029d5f4fa1d203a6-cfdd-40a1-af2f-0fa502ea65b4Westbound1
103 - MarysvilleYolo County Transportation DistrictROUTE 215 WBBuse939d633652e2af6d3aa82d28a042dbf07959480-2a40-4a51-92ac-8ca2029d5f4fc8a9bfc8-7e84-483b-95bc-02a1494c3ae3Westbound1
\n", + "
" + ], + "text/plain": [ + " caltrans_district organization_name route_long_name \\\n", + "0 03 - Marysville Yolo County Transportation District ROUTE 215 WB \n", + "1 03 - Marysville Yolo County Transportation District ROUTE 215 WB \n", + "\n", + " route_type_str shape_array_key \\\n", + "0 Bus e939d633652e2af6d3aa82d28a042dbf \n", + "1 Bus e939d633652e2af6d3aa82d28a042dbf \n", + "\n", + " route_id stop_id \\\n", + "0 07959480-2a40-4a51-92ac-8ca2029d5f4f a1d203a6-cfdd-40a1-af2f-0fa502ea65b4 \n", + "1 07959480-2a40-4a51-92ac-8ca2029d5f4f c8a9bfc8-7e84-483b-95bc-02a1494c3ae3 \n", + "\n", + " route_primary_direction bunched_trips \n", + "0 Westbound 1 \n", + "1 Westbound 1 " + ] + }, + "execution_count": 125, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "bunched.loc[\n", - " (bunched.schedule_gtfs_dataset_key == \"7cc0cb1871dfd558f11a2885c145d144\")\n", - " & (bunched.shape_array_key == \"955e2fc8f9f8a4be2c67c7212be874f6\")\n", - " & (bunched.route_id == \"1\")\n", - " & (bunched.direction_id == 1)\n", - " & (bunched.stop_id == \"13853\")\n", - "]" + "bunched_only_two_min.head(2)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "5cb6b93b-6115-4502-b74b-0b7bf48612ea", + "execution_count": 126, + "id": "1c112644-81a9-4c59-99d7-c1ec98a1da8c", "metadata": {}, "outputs": [], "source": [ - "rt_stop_times5.loc[\n", - " (rt_stop_times5.schedule_gtfs_dataset_key == \"7cc0cb1871dfd558f11a2885c145d144\")\n", - " & (rt_stop_times5.shape_array_key == \"955e2fc8f9f8a4be2c67c7212be874f6\")\n", - " & (rt_stop_times5.route_id == \"1\")\n", - " & (rt_stop_times5.direction_id == 1)\n", - " & (rt_stop_times5.stop_id == \"13853\")\n", - "][[\"scheduled_arrival_sec2\", \"rt_arrival_sec\", \"actual_headway\", \"schd_headway\"]]" + "transit_matters_agg.remove(\"bunched_y_n\")" ] }, { "cell_type": "code", - "execution_count": null, - "id": "2607398c-f46c-4899-a634-acad3f6c60e2", - "metadata": { - "scrolled": true, - "tags": [] - }, + "execution_count": 127, + "id": "82b375ea-f7cd-4567-83e6-7143ab7c9e6d", + "metadata": {}, + "outputs": [], + "source": [ + "two_minutes_agg1 = two_minutes_agg1.drop(columns=[\"bunched_y_n\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "id": "9cb116be-b313-40e3-a0ef-a510b42da6e1", + "metadata": {}, + "outputs": [], + "source": [ + "# Need to do a left merge on all trips for the stops that don't have bunching.\n", + "final_two_minute = pd.merge(\n", + " two_minutes_agg1,\n", + " bunched_only_two_min,\n", + " on=transit_matters_agg,\n", + " how=\"outer\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "id": "cdae7af9-696e-42dd-8c88-ee81b4e0bfef", + "metadata": {}, + "outputs": [], + "source": [ + "final_two_minute = final_two_minute.fillna(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "id": "e8fa9d6e-3f00-4d98-b5c4-f1cde6c23972", + "metadata": {}, + "outputs": [], + "source": [ + "final_two_minute[\"pct_trips_bunched\"] = (\n", + " final_two_minute.bunched_trips / final_two_minute.all_trips * 100\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "id": "28052971-36d9-447a-89b2-ab700ee3e731", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caltrans_districtorganization_nameroute_long_nameroute_type_strshape_array_keyroute_idstop_idroute_primary_directionall_tripsbunched_tripspct_trips_bunched
001 - EurekaCity of EurekaAMRTS Gold RouteBusc47c15ffc43da6e556ff913272778e4d141262Northbound110.000.00
101 - EurekaCity of EurekaAMRTS Gold RouteBusc47c15ffc43da6e556ff913272778e4d141264Northbound110.000.00
\n", + "
" + ], + "text/plain": [ + " caltrans_district organization_name route_long_name route_type_str \\\n", + "0 01 - Eureka City of Eureka AMRTS Gold Route Bus \n", + "1 01 - Eureka City of Eureka AMRTS Gold Route Bus \n", + "\n", + " shape_array_key route_id stop_id route_primary_direction \\\n", + "0 c47c15ffc43da6e556ff913272778e4d 14 1262 Northbound \n", + "1 c47c15ffc43da6e556ff913272778e4d 14 1264 Northbound \n", + "\n", + " all_trips bunched_trips pct_trips_bunched \n", + "0 11 0.00 0.00 \n", + "1 11 0.00 0.00 " + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_two_minute.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "id": "1dff846b-30a7-4ddb-95e6-6e61ecf64241", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "113" + ] + }, + "execution_count": 135, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_two_minute.route_id.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "id": "bb8efe64-19d5-4db2-bac1-50ee1b9102b1", + "metadata": {}, "outputs": [], "source": [ - "bunched.sort_values(by=[\"pct_trips_bunched\"], ascending=False)" + "final_two_minute2 = (\n", + " final_two_minute.sort_values(by=[\"all_trips\"], ascending=False)\n", + " .drop_duplicates(subset=transit_matters_agg)\n", + " .reset_index(drop=True)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "id": "a11e9bc2-70c6-488a-aa7e-f92d8b53c8e0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 5871.00\n", + "mean 0.26\n", + "std 2.10\n", + "min 0.00\n", + "1% 0.00\n", + "2% 0.00\n", + "5% 0.00\n", + "10% 0.00\n", + "50% 0.00\n", + "90% 0.00\n", + "95% 1.84\n", + "98% 3.70\n", + "99% 5.74\n", + "max 100.00\n", + "Name: pct_trips_bunched, dtype: float64" + ] + }, + "execution_count": 139, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_two_minute2.pct_trips_bunched.describe(percentiles)" + ] + }, + { + "cell_type": "markdown", + "id": "1a0b54b9-3d4c-4cc4-8243-e26a42c47e83", + "metadata": {}, + "source": [ + "### Comparing both outcomes" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "id": "15e849c8-4695-4fe9-80a7-8f326612e052", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caltrans_districtorganization_nameroute_long_nameroute_type_strshape_array_keyroute_idstop_idroute_primary_directionall_tripsbunched_tripspct_trips_bunched
7107 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus6a10ede3fa469c8b4d9bf761946ed20a204-131725637Northbound988.008.16
\n", + "
" + ], + "text/plain": [ + " caltrans_district \\\n", + "71 07 - Los Angeles \n", + "\n", + " organization_name \\\n", + "71 Los Angeles County Metropolitan Transportation Authority \n", + "\n", + " route_long_name route_type_str shape_array_key \\\n", + "71 Metro Local Line Bus 6a10ede3fa469c8b4d9bf761946ed20a \n", + "\n", + " route_id stop_id route_primary_direction all_trips bunched_trips \\\n", + "71 204-13172 5637 Northbound 98 8.00 \n", + "\n", + " pct_trips_bunched \n", + "71 8.16 " + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transit_matters_m2.loc[\n", + " (transit_matters_m2.stop_id == \"5637\")\n", + " & (\n", + " transit_matters_m2.organization_name\n", + " == \"Los Angeles County Metropolitan Transportation Authority\"\n", + " )\n", + " & (transit_matters_m2.route_id == \"204-13172\")\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "id": "b930c91d-5aca-47ab-b61d-6cf9c97b2ee2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caltrans_districtorganization_nameroute_long_nameroute_type_strshape_array_keyroute_idstop_idroute_primary_directionall_tripsbunched_tripspct_trips_bunched
6307 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus6a10ede3fa469c8b4d9bf761946ed20a204-131725637Northbound1033.002.91
\n", + "
" + ], + "text/plain": [ + " caltrans_district \\\n", + "63 07 - Los Angeles \n", + "\n", + " organization_name \\\n", + "63 Los Angeles County Metropolitan Transportation Authority \n", + "\n", + " route_long_name route_type_str shape_array_key \\\n", + "63 Metro Local Line Bus 6a10ede3fa469c8b4d9bf761946ed20a \n", + "\n", + " route_id stop_id route_primary_direction all_trips bunched_trips \\\n", + "63 204-13172 5637 Northbound 103 3.00 \n", + "\n", + " pct_trips_bunched \n", + "63 2.91 " + ] + }, + "execution_count": 140, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_two_minute2.loc[\n", + " (final_two_minute2.stop_id == \"5637\")\n", + " & (\n", + " final_two_minute2.organization_name\n", + " == \"Los Angeles County Metropolitan Transportation Authority\"\n", + " )\n", + " & (final_two_minute2.route_id == \"204-13172\")\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "id": "168856c1-23ec-4c88-831d-c7de1c373950", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caltrans_districtorganization_nameroute_long_nameroute_type_strshape_array_keyroute_idstop_idroute_primary_directionall_tripsbunched_tripspct_trips_bunched
548604 - OaklandPeninsula Corridor Joint Powers BoardLocalRail8c4de04e7398d418c12cc1541651e951L170032Eastbound11.00100.00
548504 - OaklandPeninsula Corridor Joint Powers BoardLocalRail8c4de04e7398d418c12cc1541651e951L170022Eastbound11.00100.00
419203 - MarysvilleYolo County Transportation DistrictROUTE 215 WBBuse939d633652e2af6d3aa82d28a042dbf07959480-2a40-4a51-92ac-8ca2029d5f4fa1d203a6-cfdd-40a1-af2f-0fa502ea65b4Westbound61.0016.67
373403 - MarysvilleYolo County Transportation DistrictROUTE 215 WBBuse939d633652e2af6d3aa82d28a042dbf07959480-2a40-4a51-92ac-8ca2029d5f4fc8a9bfc8-7e84-483b-95bc-02a1494c3ae3Westbound81.0012.50
369905 - San Luis ObispoSanta Cruz Metropolitan Transit DistrictUCSC/Capitola Mall/Live OakBus9b4a79b5b21e2ca01ee032b1c39ca3c53B1388Eastbound81.0012.50
66807 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus2e0e2720bd3b6c1510ab780896264ae4206-1317214027Southbound465.0010.87
9907 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus6a10ede3fa469c8b4d9bf761946ed20a204-13172140963Northbound9510.0010.53
9507 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus6a10ede3fa469c8b4d9bf761946ed20a204-131725685Northbound969.009.38
9307 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus6a10ede3fa469c8b4d9bf761946ed20a204-13172140968Northbound969.009.38
9807 - Los AngelesLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus6a10ede3fa469c8b4d9bf761946ed20a204-131725675Northbound969.009.38
\n", + "
" + ], + "text/plain": [ + " caltrans_district \\\n", + "5486 04 - Oakland \n", + "5485 04 - Oakland \n", + "4192 03 - Marysville \n", + "3734 03 - Marysville \n", + "3699 05 - San Luis Obispo \n", + "668 07 - Los Angeles \n", + "99 07 - Los Angeles \n", + "95 07 - Los Angeles \n", + "93 07 - Los Angeles \n", + "98 07 - Los Angeles \n", + "\n", + " organization_name \\\n", + "5486 Peninsula Corridor Joint Powers Board \n", + "5485 Peninsula Corridor Joint Powers Board \n", + "4192 Yolo County Transportation District \n", + "3734 Yolo County Transportation District \n", + "3699 Santa Cruz Metropolitan Transit District \n", + "668 Los Angeles County Metropolitan Transportation Authority \n", + "99 Los Angeles County Metropolitan Transportation Authority \n", + "95 Los Angeles County Metropolitan Transportation Authority \n", + "93 Los Angeles County Metropolitan Transportation Authority \n", + "98 Los Angeles County Metropolitan Transportation Authority \n", + "\n", + " route_long_name route_type_str \\\n", + "5486 Local Rail \n", + "5485 Local Rail \n", + "4192 ROUTE 215 WB Bus \n", + "3734 ROUTE 215 WB Bus \n", + "3699 UCSC/Capitola Mall/Live Oak Bus \n", + "668 Metro Local Line Bus \n", + "99 Metro Local Line Bus \n", + "95 Metro Local Line Bus \n", + "93 Metro Local Line Bus \n", + "98 Metro Local Line Bus \n", + "\n", + " shape_array_key route_id \\\n", + "5486 8c4de04e7398d418c12cc1541651e951 L1 \n", + "5485 8c4de04e7398d418c12cc1541651e951 L1 \n", + "4192 e939d633652e2af6d3aa82d28a042dbf 07959480-2a40-4a51-92ac-8ca2029d5f4f \n", + "3734 e939d633652e2af6d3aa82d28a042dbf 07959480-2a40-4a51-92ac-8ca2029d5f4f \n", + "3699 9b4a79b5b21e2ca01ee032b1c39ca3c5 3B \n", + "668 2e0e2720bd3b6c1510ab780896264ae4 206-13172 \n", + "99 6a10ede3fa469c8b4d9bf761946ed20a 204-13172 \n", + "95 6a10ede3fa469c8b4d9bf761946ed20a 204-13172 \n", + "93 6a10ede3fa469c8b4d9bf761946ed20a 204-13172 \n", + "98 6a10ede3fa469c8b4d9bf761946ed20a 204-13172 \n", + "\n", + " stop_id route_primary_direction all_trips \\\n", + "5486 70032 Eastbound 1 \n", + "5485 70022 Eastbound 1 \n", + "4192 a1d203a6-cfdd-40a1-af2f-0fa502ea65b4 Westbound 6 \n", + "3734 c8a9bfc8-7e84-483b-95bc-02a1494c3ae3 Westbound 8 \n", + "3699 1388 Eastbound 8 \n", + "668 14027 Southbound 46 \n", + "99 140963 Northbound 95 \n", + "95 5685 Northbound 96 \n", + "93 140968 Northbound 96 \n", + "98 5675 Northbound 96 \n", + "\n", + " bunched_trips pct_trips_bunched \n", + "5486 1.00 100.00 \n", + "5485 1.00 100.00 \n", + "4192 1.00 16.67 \n", + "3734 1.00 12.50 \n", + "3699 1.00 12.50 \n", + "668 5.00 10.87 \n", + "99 10.00 10.53 \n", + "95 9.00 9.38 \n", + "93 9.00 9.38 \n", + "98 9.00 9.38 " + ] + }, + "execution_count": 142, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_two_minute2.sort_values(by=[\"pct_trips_bunched\"], ascending=False).head(10)" + ] + }, + { + "cell_type": "markdown", + "id": "58e2034a-b640-4e7c-ab67-70393d0920e9", + "metadata": {}, + "source": [ + "#### City of Visalia" + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "id": "8bdd003a-0083-41d3-95a9-59bf617c5570", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
converted_rt_arrivalactual_arrival_lagconverted_schd_arrivalscheduled_arrival_lagrt_minsbunched_y_n
913662024-05-22 06:32:47NaT2024-05-22 06:42:00NaTNaNnot bunched
913672024-05-22 07:26:410 days 00:53:542024-05-22 07:27:000 days 00:45:0053.90not bunched
913682024-05-22 08:06:110 days 00:39:302024-05-22 08:12:000 days 00:45:0039.50not bunched
913692024-05-22 08:56:570 days 00:50:462024-05-22 08:57:000 days 00:45:0050.77not bunched
913702024-05-22 09:37:590 days 00:41:022024-05-22 09:42:000 days 00:45:0041.03not bunched
913712024-05-22 10:27:260 days 00:49:272024-05-22 10:27:000 days 00:45:0049.45not bunched
913722024-05-22 11:10:050 days 00:42:392024-05-22 11:12:000 days 00:45:0042.65not bunched
913732024-05-22 12:01:010 days 00:50:562024-05-22 11:57:000 days 00:45:0050.93not bunched
913742024-05-22 12:38:080 days 00:37:072024-05-22 12:42:000 days 00:45:0037.12not bunched
913752024-05-22 13:27:100 days 00:49:022024-05-22 13:27:000 days 00:45:0049.03not bunched
913762024-05-22 14:08:380 days 00:41:282024-05-22 14:12:000 days 00:45:0041.47not bunched
913772024-05-22 14:58:220 days 00:49:442024-05-22 14:57:000 days 00:45:0049.73not bunched
913782024-05-22 16:26:530 days 01:28:312024-05-22 16:27:000 days 01:30:0088.52not bunched
913792024-05-22 17:08:470 days 00:41:542024-05-22 15:42:00-1 days +23:15:0041.90not bunched
913802024-05-22 17:57:230 days 00:48:362024-05-22 17:57:000 days 02:15:0048.60not bunched
913812024-05-22 18:41:370 days 00:44:142024-05-22 17:12:00-1 days +23:15:0044.23not bunched
913822024-05-22 19:24:430 days 00:43:062024-05-22 19:27:000 days 02:15:0043.10not bunched
913832024-05-22 20:09:430 days 00:45:002024-05-22 18:42:00-1 days +23:15:0045.00not bunched
\n", + "
" + ], + "text/plain": [ + " converted_rt_arrival actual_arrival_lag converted_schd_arrival \\\n", + "91366 2024-05-22 06:32:47 NaT 2024-05-22 06:42:00 \n", + "91367 2024-05-22 07:26:41 0 days 00:53:54 2024-05-22 07:27:00 \n", + "91368 2024-05-22 08:06:11 0 days 00:39:30 2024-05-22 08:12:00 \n", + "91369 2024-05-22 08:56:57 0 days 00:50:46 2024-05-22 08:57:00 \n", + "91370 2024-05-22 09:37:59 0 days 00:41:02 2024-05-22 09:42:00 \n", + "91371 2024-05-22 10:27:26 0 days 00:49:27 2024-05-22 10:27:00 \n", + "91372 2024-05-22 11:10:05 0 days 00:42:39 2024-05-22 11:12:00 \n", + "91373 2024-05-22 12:01:01 0 days 00:50:56 2024-05-22 11:57:00 \n", + "91374 2024-05-22 12:38:08 0 days 00:37:07 2024-05-22 12:42:00 \n", + "91375 2024-05-22 13:27:10 0 days 00:49:02 2024-05-22 13:27:00 \n", + "91376 2024-05-22 14:08:38 0 days 00:41:28 2024-05-22 14:12:00 \n", + "91377 2024-05-22 14:58:22 0 days 00:49:44 2024-05-22 14:57:00 \n", + "91378 2024-05-22 16:26:53 0 days 01:28:31 2024-05-22 16:27:00 \n", + "91379 2024-05-22 17:08:47 0 days 00:41:54 2024-05-22 15:42:00 \n", + "91380 2024-05-22 17:57:23 0 days 00:48:36 2024-05-22 17:57:00 \n", + "91381 2024-05-22 18:41:37 0 days 00:44:14 2024-05-22 17:12:00 \n", + "91382 2024-05-22 19:24:43 0 days 00:43:06 2024-05-22 19:27:00 \n", + "91383 2024-05-22 20:09:43 0 days 00:45:00 2024-05-22 18:42:00 \n", + "\n", + " scheduled_arrival_lag rt_mins bunched_y_n \n", + "91366 NaT NaN not bunched \n", + "91367 0 days 00:45:00 53.90 not bunched \n", + "91368 0 days 00:45:00 39.50 not bunched \n", + "91369 0 days 00:45:00 50.77 not bunched \n", + "91370 0 days 00:45:00 41.03 not bunched \n", + "91371 0 days 00:45:00 49.45 not bunched \n", + "91372 0 days 00:45:00 42.65 not bunched \n", + "91373 0 days 00:45:00 50.93 not bunched \n", + "91374 0 days 00:45:00 37.12 not bunched \n", + "91375 0 days 00:45:00 49.03 not bunched \n", + "91376 0 days 00:45:00 41.47 not bunched \n", + "91377 0 days 00:45:00 49.73 not bunched \n", + "91378 0 days 01:30:00 88.52 not bunched \n", + "91379 -1 days +23:15:00 41.90 not bunched \n", + "91380 0 days 02:15:00 48.60 not bunched \n", + "91381 -1 days +23:15:00 44.23 not bunched \n", + "91382 0 days 02:15:00 43.10 not bunched \n", + "91383 -1 days +23:15:00 45.00 not bunched " + ] + }, + "execution_count": 151, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "two_minutes_df.loc[\n", + " (two_minutes_df.stop_id == \"2307719\")\n", + " & (two_minutes_df.organization_name == \"City of Visalia\")\n", + " & (two_minutes_df.route_id == \"2042\")\n", + " & (two_minutes_df.shape_array_key == \"60da59c7000ea5dcb5f845d8fa227f14\")\n", + "][\n", + " [\n", + " \"converted_rt_arrival\",\n", + " \"actual_arrival_lag\",\n", + " \"converted_schd_arrival\",\n", + " \"scheduled_arrival_lag\",\n", + " \"rt_mins\",\n", + " \"bunched_y_n\",\n", + " ]\n", + "]" ] } ], diff --git a/rt_scheduled_v_ran/11_agency_agg.ipynb b/rt_scheduled_v_ran/11_agency_agg.ipynb new file mode 100644 index 000000000..92c005797 --- /dev/null +++ b/rt_scheduled_v_ran/11_agency_agg.ipynb @@ -0,0 +1,715 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "f586303a-9775-4b42-8f99-81290515aa7c", + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import numpy as np\n", + "import pandas as pd\n", + "from segment_speed_utils import (\n", + " gtfs_schedule_wrangling,\n", + " helpers,\n", + " metrics,\n", + " time_series_utils,\n", + ")\n", + "from segment_speed_utils.project_vars import (\n", + " COMPILED_CACHED_VIEWS,\n", + " GTFS_DATA_DICT,\n", + " PROJECT_CRS,\n", + " RT_SCHED_GCS,\n", + " SCHED_GCS,\n", + " SEGMENT_GCS,\n", + ")\n", + "from shared_utils import catalog_utils, rt_dates, rt_utils" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2a88fa23-caec-4cf7-845a-54a09fe7a81d", + "metadata": {}, + "outputs": [], + "source": [ + "pd.options.display.max_columns = 100\n", + "pd.options.display.float_format = \"{:.2f}\".format\n", + "pd.set_option(\"display.max_rows\", None)\n", + "pd.set_option(\"display.max_colwidth\", None)" + ] + }, + { + "cell_type": "markdown", + "id": "baf7ddd3-fd43-458a-9479-71bc9f7935db", + "metadata": {}, + "source": [ + "### Exploring" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "00b45e96-315f-4f74-af8c-74eb994057ab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'gs://calitp-analytics-data/data-analyses/rt_vs_schedule/'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "RT_SCHED_GCS" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "78b859a7-7598-4719-a806-887b31a5daa9", + "metadata": {}, + "outputs": [], + "source": [ + "dict_inputs = GTFS_DATA_DICT.rt_vs_schedule_tables" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a666f731-821a-4d7f-adc6-36ab7ee1428c", + "metadata": {}, + "outputs": [], + "source": [ + "ROUTE_EXPORT = dict_inputs.vp_route_direction_metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2d7af748-ceae-4a95-88e6-e24eb788a253", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'vp_route_dir/route_direction_metrics'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ROUTE_EXPORT" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7d685df6-f33c-430b-a878-22f7ce894aa2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'vp_agency/agency_metrics'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dict_inputs.vp_agency_metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "386cab22-a872-4c9b-8eb4-970adede9c90", + "metadata": {}, + "outputs": [], + "source": [ + "analysis_date = rt_dates.DATES[\"apr2024\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "d07f3469-8630-41e5-a85f-dfc6e8dd544d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'may2022': '2022-05-04',\n", + " 'sep2022': '2022-09-14',\n", + " 'sep2022a': '2022-09-21',\n", + " 'oct2022': '2022-10-12',\n", + " 'nov2022a': '2022-11-07',\n", + " 'nov2022b': '2022-11-08',\n", + " 'nov2022c': '2022-11-09',\n", + " 'nov2022d': '2022-11-10',\n", + " 'nov2022': '2022-11-16',\n", + " 'mar2023': '2023-03-15',\n", + " 'may2023': '2023-05-17',\n", + " 'sep2023': '2023-09-13',\n", + " 'oct2023a': '2023-10-09',\n", + " 'oct2023b': '2023-10-10',\n", + " 'oct2023': '2023-10-11',\n", + " 'oct2023c': '2023-10-12',\n", + " 'oct2023d': '2023-10-13',\n", + " 'oct2023e': '2023-10-14',\n", + " 'oct2023f': '2023-10-15',\n", + " 'nov2023': '2023-11-15',\n", + " 'mar2024': '2024-03-13',\n", + " 'may2024': '2024-05-22',\n", + " 'sep2024': '2024-09-18'}" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "{k: v for k, v in rt_dates.DATES.items() if (k[:3], k[3:]) >= ('mar', '2023')}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "bb39cfdf-43a3-46e4-b200-019eb08b2de3", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_parquet(f\"{RT_SCHED_GCS}{ROUTE_EXPORT}_{analysis_date}.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "69d44f05-44b3-41f1-8d69-c5ddc7fb3dcd", + "metadata": {}, + "outputs": [], + "source": [ + "df = df.loc[df[\"time_period\"] == \"all_day\"].reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "9da30f17-f428-4dad-a889-e86b2ce605f8", + "metadata": {}, + "outputs": [], + "source": [ + "groupby_cols = [\"caltrans_district\", \"organization_name\", \"schedule_gtfs_dataset_key\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "0d8b94cc-f6ee-4e91-b8c2-3f98048e81bf", + "metadata": {}, + "outputs": [], + "source": [ + "agg1 = (\n", + " df.groupby(groupby_cols)\n", + " .agg(\n", + " {\n", + " \"total_vp\": \"sum\",\n", + " \"vp_in_shape\": \"sum\",\n", + " \"total_rt_service_minutes\": \"sum\",\n", + " }\n", + " )\n", + " .reset_index()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "45a50735-5253-4434-99d2-2feb28431bd4", + "metadata": {}, + "outputs": [], + "source": [ + "agg1[\"vp_per_min_agency\"] = ((agg1.total_vp / agg1.total_rt_service_minutes)).round(2)\n", + "agg1[\"spatial_accuracy_agency\"] = ((agg1.vp_in_shape / agg1.total_vp) * 100).round(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c886f721-ae63-4b23-8100-40fafc3587d4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
32
caltrans_district04 - Oakland
organization_nameSanta Clara Valley Transportation Authority
schedule_gtfs_dataset_keyfb467982dcc77a7f9199bebe709bb700
total_vp778638
vp_in_shape739031
total_rt_service_minutes303399.17
vp_per_min_agency2.57
spatial_accuracy_agency94.91
\n", + "
" + ], + "text/plain": [ + " 32\n", + "caltrans_district 04 - Oakland\n", + "organization_name Santa Clara Valley Transportation Authority\n", + "schedule_gtfs_dataset_key fb467982dcc77a7f9199bebe709bb700\n", + "total_vp 778638\n", + "vp_in_shape 739031\n", + "total_rt_service_minutes 303399.17\n", + "vp_per_min_agency 2.57\n", + "spatial_accuracy_agency 94.91" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agg1.sample().T" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3985f03a-bcd5-41f9-bcff-a5f3a1436603", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caltrans_districtorganization_nameschedule_gtfs_dataset_keytotal_vpvp_in_shapetotal_rt_service_minutesvp_per_min_agencyspatial_accuracy_agency
001 - EurekaCity of Eurekaa253a8d7acd57657bb98050f37dd6b0f379811800013102.612.9047.39
101 - EurekaLake Transit Authority0a3c0b21c85fb09f8db91599e14dd7f713320127725433.322.4595.89
\n", + "
" + ], + "text/plain": [ + " caltrans_district organization_name schedule_gtfs_dataset_key \\\n", + "0 01 - Eureka City of Eureka a253a8d7acd57657bb98050f37dd6b0f \n", + "1 01 - Eureka Lake Transit Authority 0a3c0b21c85fb09f8db91599e14dd7f7 \n", + "\n", + " total_vp vp_in_shape total_rt_service_minutes vp_per_min_agency \\\n", + "0 37981 18000 13102.61 2.90 \n", + "1 13320 12772 5433.32 2.45 \n", + "\n", + " spatial_accuracy_agency \n", + "0 47.39 \n", + "1 95.89 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agg1.head(2)" + ] + }, + { + "cell_type": "markdown", + "id": "b1d4b72a-e09d-41f0-bc13-c78e65bad8b0", + "metadata": {}, + "source": [ + "### Functions " + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "02a30975-d9d5-4174-8a5f-47c1e80970df", + "metadata": {}, + "outputs": [], + "source": [ + "def agency_metrics(analysis_date: str, dict_inputs: dict) -> pd.DataFrame:\n", + " # start = datetime.datetime.now()\n", + "\n", + " ROUTE_EXPORT = dict_inputs.vp_route_direction_metrics\n", + " AGENCY_EXPORT = dict_inputs.vp_agency_metrics\n", + "\n", + " # Read in dataframe.\n", + " df = pd.read_parquet(f\"{RT_SCHED_GCS}{ROUTE_EXPORT}_{analysis_date}.parquet\")\n", + "\n", + " # Keep only all_day.\n", + " df = df.loc[df[\"time_period\"] == \"all_day\"].reset_index(drop=True)\n", + "\n", + " # Aggregate\n", + " groupby_cols = [\n", + " \"caltrans_district\",\n", + " \"organization_name\",\n", + " \"schedule_gtfs_dataset_key\",\n", + " ]\n", + "\n", + " sum_cols = [\"total_vp\", \"vp_in_shape\", \"total_rt_service_minutes\"]\n", + " agg1 = df.groupby(groupby_cols).agg({**{e: \"sum\" for e in sum_cols}}).reset_index()\n", + "\n", + " agg1[\"vp_per_min_agency\"] = ((agg1.total_vp / agg1.total_rt_service_minutes)).round(\n", + " 2\n", + " )\n", + " agg1[\"spatial_accuracy_agency\"] = ((agg1.vp_in_shape / agg1.total_vp) * 100).round(\n", + " 2\n", + " )\n", + "\n", + " agg1 = agg1.drop(columns=sum_cols)\n", + " # Save\n", + " agg1.to_parquet(f\"{RT_SCHED_GCS}{AGENCY_EXPORT}_TEST_{analysis_date}.parquet\")\n", + "\n", + " # end = datetime.datetime.now()\n", + " # logger.info(f\"agency aggregation {analysis_date}: {end - start}\")\n", + "\n", + " return agg1" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "7696d284-c8cb-4739-8131-dc873933994e", + "metadata": {}, + "outputs": [], + "source": [ + "analysis_date2 = rt_dates.DATES[\"apr2024\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "815a5ad4-2422-44ed-86c2-2bd5c4eae693", + "metadata": {}, + "outputs": [], + "source": [ + "dict_inputs = GTFS_DATA_DICT.rt_vs_schedule_tables" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "85a4d3ea-cf58-44e7-901f-59414416e092", + "metadata": {}, + "outputs": [], + "source": [ + "apr_df = agency_metrics(\n", + " analysis_date2,\n", + " dict_inputs,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "0b62649f-15b6-459d-ab1d-b2627521abfe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8153321332404478" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "703396 / 862711" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "82ce25f9-3088-4d10-8427-f0fdf4f8c05f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
12
caltrans_district04 - Oakland
organization_nameAlameda-Contra Costa Transit District
schedule_gtfs_dataset_keyc499f905e33929a641f083dad55c521e
vp_per_min_agency2.02
spatial_accuracy_agency81.53
\n", + "
" + ], + "text/plain": [ + " 12\n", + "caltrans_district 04 - Oakland\n", + "organization_name Alameda-Contra Costa Transit District\n", + "schedule_gtfs_dataset_key c499f905e33929a641f083dad55c521e\n", + "vp_per_min_agency 2.02\n", + "spatial_accuracy_agency 81.53" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "apr_df.loc[apr_df.organization_name == \"Alameda-Contra Costa Transit District\"].T" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "d006206a-48e0-49b4-ba68-cd279fd7f0dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
34
caltrans_district04 - Oakland
organization_nameSonoma-Marin Area Rail Transit District
schedule_gtfs_dataset_key0881af3822466784992a49f1cc57d38f
vp_per_min_agency3.01
spatial_accuracy_agency99.61
\n", + "
" + ], + "text/plain": [ + " 34\n", + "caltrans_district 04 - Oakland\n", + "organization_name Sonoma-Marin Area Rail Transit District\n", + "schedule_gtfs_dataset_key 0881af3822466784992a49f1cc57d38f\n", + "vp_per_min_agency 3.01\n", + "spatial_accuracy_agency 99.61" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "apr_df.sample().T" + ] + }, + { + "cell_type": "markdown", + "id": "b3e76f4c-c933-490f-89bf-01369797e5b0", + "metadata": {}, + "source": [ + "### Look at the files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2038f4d6-1d3e-4331-b08f-aa2812a6b749", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/rt_scheduled_v_ran/scripts/rt_v_scheduled_agency.py b/rt_scheduled_v_ran/scripts/rt_v_scheduled_agency.py new file mode 100644 index 000000000..702578fe6 --- /dev/null +++ b/rt_scheduled_v_ran/scripts/rt_v_scheduled_agency.py @@ -0,0 +1,68 @@ +""" +Generate RT vs schedule metrics for agency-level. +""" +import datetime +import pandas as pd +import sys + +from loguru import logger + +from segment_speed_utils import gtfs_schedule_wrangling, metrics +from segment_speed_utils.time_series_utils import ROUTE_DIR_COLS +from update_vars import RT_SCHED_GCS, GTFS_DATA_DICT +from shared_utils import rt_dates + +def agency_metrics(analysis_date: str, dict_inputs: dict) -> pd.DataFrame: + start = datetime.datetime.now() + + ROUTE_EXPORT = dict_inputs.vp_route_direction_metrics + AGENCY_EXPORT = dict_inputs.vp_agency_metrics + + # Read in dataframe. + df = pd.read_parquet(f"{RT_SCHED_GCS}{ROUTE_EXPORT}_{analysis_date}.parquet") + + # Keep only all_day. + df = df.loc[df["time_period"] == "all_day"].reset_index(drop=True) + + # Aggregate + groupby_cols = [ + "caltrans_district", + "organization_name", + "schedule_gtfs_dataset_key", + ] + + sum_cols = ["total_vp", "vp_in_shape", "total_rt_service_minutes"] + agg1 = df.groupby(groupby_cols).agg({**{e: "sum" for e in sum_cols}}).reset_index() + + agg1["vp_per_min_agency"] = ((agg1.total_vp / agg1.total_rt_service_minutes)).round( + 2 + ) + agg1["spatial_accuracy_agency"] = ((agg1.vp_in_shape / agg1.total_vp) * 100).round( + 2 + ) + + # Cleanrt_V + agg1 = agg1.drop(columns=sum_cols) + + # Save + agg1.to_parquet(f"{RT_SCHED_GCS}{AGENCY_EXPORT}_TEST_{analysis_date}.parquet") + + end = datetime.datetime.now() + logger.info(f"agency aggregation {analysis_date}: {end - start}") + + return agg1 + +if __name__ == "__main__": + + LOG_FILE = "../logs/rt_v_scheduled_agency_metrics.log" + logger.add(LOG_FILE, retention="3 months") + logger.add(sys.stderr, + format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}", + level="INFO") + + from update_vars import analysis_date_list + + dict_inputs = GTFS_DATA_DICT.rt_vs_schedule_tables + + for analysis_date in analysis_date_list: + agency_metrics(analysis_date, dict_inputs) \ No newline at end of file diff --git a/rt_scheduled_v_ran/scripts/rt_v_scheduled_routes.py b/rt_scheduled_v_ran/scripts/rt_v_scheduled_routes.py index 7c5bfef9b..b60ed484a 100644 --- a/rt_scheduled_v_ran/scripts/rt_v_scheduled_routes.py +++ b/rt_scheduled_v_ran/scripts/rt_v_scheduled_routes.py @@ -60,6 +60,7 @@ def route_metrics( "organization_source_record_id", "organization_name", "caltrans_district",] + route_df = metrics.concatenate_peak_offpeak_allday_averages( trip_df, group_cols = ["schedule_gtfs_dataset_key"] + ROUTE_DIR_COLS, diff --git a/rt_scheduled_v_ran/scripts/update_vars.py b/rt_scheduled_v_ran/scripts/update_vars.py index c29e2d583..c91feb387 100644 --- a/rt_scheduled_v_ran/scripts/update_vars.py +++ b/rt_scheduled_v_ran/scripts/update_vars.py @@ -5,8 +5,8 @@ apr2023_week = rt_dates.get_week("apr2023", exclude_wed=True) apr2024_week = rt_dates.get_week("apr2024", exclude_wed=True) -analysis_date_list = [rt_dates.DATES["sep2024"]] - +# analysis_date_list = [rt_dates.DATES["sep2024"]] +analysis_date_list = {k: v for k, v in your_dict.items() if (k[:3], k[3:]) >= ('mar', '2023')} GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data") From 26fdb408f3403a84038fbe0da90edb81163a72db Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Wed, 2 Oct 2024 20:39:07 +0000 Subject: [PATCH 3/6] fixing some weird github thing --- portfolio/gtfs_digest_testing/README.md | 40 ----------------- portfolio/gtfs_digest_testing/_config.yml | 43 ------------------- portfolio/gtfs_digest_testing/_toc.yml | 8 ---- .../district_04-oakland.md | 1 - 4 files changed, 92 deletions(-) delete mode 100644 portfolio/gtfs_digest_testing/README.md delete mode 100644 portfolio/gtfs_digest_testing/_config.yml delete mode 100644 portfolio/gtfs_digest_testing/_toc.yml delete mode 100644 portfolio/gtfs_digest_testing/district_04-oakland.md diff --git a/portfolio/gtfs_digest_testing/README.md b/portfolio/gtfs_digest_testing/README.md deleted file mode 100644 index 274b6f533..000000000 --- a/portfolio/gtfs_digest_testing/README.md +++ /dev/null @@ -1,40 +0,0 @@ -# General Transit Feed Specification (GTFS) Digest -The goal of this website is to give you an overview of transit operators that produce GTFS schedule and/or real-time data. We use data from the [National Transit Database](https://www.transit.dot.gov/ntd), [National Association of City Transportation Officials's Transit Route Types](https://nacto.org/publication/transit-street-design-guide/introduction/service-context/transit-route-types/), and [GTFS feeds](https://gtfs.org/) to deliver key insights. You can find details such as the types of routes and the total scheduled hours of public transit service for which an operator runs. - -For operators who produce real-time data, we also calculate additional performance metrics for all of their routes. Examples include displaying the number of on-time, early, and late trips, the average speed, and the headway for a route. - -GTFS Digest will continue to evolve as we dive into our own data warehouse! - -## Definitions and Methodology -To read about the methodology behind and the definitions of terms used throughout our work, please visit [here](https://github.com/cal-itp/data-analyses/blob/main/gtfs_digest/methodology.md). - -## Frequently Asked Questions -**Why are the time-series tables sampling single days?** -GTFS provides us with extremely detailed information, such as the time a bus is scheduled to arrive at a stop, and the GPS coordinates of a bus at a given timestamp. When working with granular data like this, a single day statewide can be a very large table. - -For context, on our sampled date in January 2024 there were 100k+ trips and 3.6 million+ stop arrivals, and that's just scheduled data. Our vehicle positions table genereated in real-time, after deduplicating in our warehouse, had 15 million+ rows. On top of that, each operator can have a quartet of GTFS data (1 schedule table + 3 real-time tables). - -Getting our pipeline right is fairly complex for a single day. Our warehouse has a set of internal keys to ensure we're matching trip for trip across quartets. If you factor in the fact that operators can update their GTFS feeds at any time in the month, there are a lot of things that are changing! - -We do have monthly aggregations on our roadmap, but for now, we're building out our own time-series tables of processed data, and working through the kinks of being able to track the same route over time (as feeds get updated, identifiers change, etc). We will be starting with schedule data to figure out how to produce monthly aggregations in a scalable way. - -**How does GTFS Digest fit into SB 125 performance metrics?** - -[SB 125](https://calsta.ca.gov/subject-areas/sb125-transit-program) and the creation of the Transit Transformation Task Force has a section on creating performance metrics for transit operators statewide. Dive into the [legislative bill](https://legiscan.com/CA/text/SB125/id/2831757). - -The Caltrans Division of Data & Digital Services has been ingesting and collecting GTFS data in our warehouse since 2021. Our own internal effort has been to create data pipelines so that the rich and comprehensive data we collect can be processed and made available for public consumption. - -There are overlaps with the goals of SB 125. There are a set of performance metrics that could be of interest to the task force, the public, and us! However, GTFS Digest is a **GTFS** digest, which means its primary focus is on metrics that can be derived purely from GTFS, and to do it statewide so we can understand transit operator performance. We based a lot of our metrics on the papers by [Professor Gregory Newmark](https://www.morgan.edu/sap/gregory-newmark) that gave us a roadmap of metrics that could be derived solely from GTFS that would create comparisons of transit operators regardless of size, service area and density. - -## Data Sources -The GTFS Digest is comprised of four major datasets. The processing of the datasets is detailed below. -[![mermaid_diagram1](https://mermaid.ink/img/pako:eNqlVmtv4jgU_StWVhVUCxQooSUjrTSQgf0w3UdhdqUto8pNnMQaY0e2M21a9b_vtU2ApLAzu_sFEvvch-8598YvXiRi4gVet9tdc001IwFarOZLFNKUKI3W3O6cnb2sOUKUUx0g-4hQS2dkQ1oBaj1gRVqdw9U_sKT4gRHV2sFhK5d0g2U5E0xIY_fDB3_uz99XpnvEijzpParf77-FTIWMiTwFYpSTU3uKRILH9Tzm86sP0wOMJlLTGiRJkpbbfjV_8PN6drbma54w8RhlWGr08dYBVPGQSpxnrpC_CakTwahAnzRlVJdoXvBIU8GVg88Hd6lO1H1sK35xD_mZ3cF9AXjVy8vPQRDECep2f9oaDNuhIAo9oRJhHqNnpDPKU4W0OD9AXd4tJKYcDZz9O5Os2Vgcjzc8GW_xXfEWjXiVLQAWo-3WcJeK2SPgyz016vaJA7VKQywIA8EIsuYKPRAIHLuVvUgr1hCKGFYqJAmC9BPKWLBV2BGEiiTNdYWy_B9BOWIdaD6bjvof9qD3g7tfcyKxFtKWZUnkVxoRFGJN3Dnh6GiPtzbD9mHtN0Sm5F5svdzHWGOo_zkYb9M74uGyvdQ4-qJsFYyFIlohkaCdm1wKyBgIq1uaHCMplHrEDMwfqc7QL6vQ-kAJHKIBZ8yGUNtjAYwYyoGESGwI0nRjNyms2sQb9jfmbC7L4RuwzVmLlMC27J3vBNc87LjdXgmNGboVhQn_UKJVmUNTpuW5sbL8vNsbTQ846dYYgVfr4_OpWNP_Tc30ODU6wxrByNEg4bqBgUmTFFIZzuF4hgWzWIfVGTA02t7AMqbPsOA8cLwhqtOwBKpwoYjjEQAO21IWjSLMjcB5SpD4SqQlqXfQ0btzjdrtj4Lb78GPaJnBQDOPtpx7Fpq1sAWa-u32Dc6NPC0cLYiwPQ5COErgbE9g909CvlwssS5kjMuLZQFHLrs_i0Ke5HD2Txx-i77ZZXsO9VdRRuKCERgzAISSwrBT1bhQjl_ojbrt0U5x6jYtE0liJIgP-6DZL6uM8G2ww26BV5ymkqTgIDb6hyIgXeaks2v4zpFGz6BMJjLkAH-mg_bnqvI0GHXQfI1yAOshpqxEy51l1VB1pKHEadcwBvmdEsXMiWLm_3vPlQy-5Xr8H1xbYR1VY3igRqdflcMn3NAQUum-m923c_-IuMJjyvyeoRLWhsqbYeyU1eD_UECd08OibpVQjpnFmECRMYAFFFfnBB1urVPCoSaMlXUHGcGxyekRTCFsTOAStYF7WBXUiraRaZ4DVbuP0ukPQQhi_L3A9vq0myWGvFtqrgqnVBE6VYR-3ZpgZivUAJvvYCQYg_OCbIz37YnfaANuLtvblPmr7jDr3XjfO3ar7t3reMD7BtMYLtz2Vrz27G157QXwGJMEF0yvPbhaAhQXWixLHnmBlgXpeJBJmnlBgpmCtyI3Eggphmm6qSAkpqDUG3ejtxf7jpdj7gUv3pMXXPV7o7E_Gk6Gw_7A9y87XukFg-Gkdzm4Glz7o8l4MvCvXjvesxDgctC7nkyu-0N_7PtXo_FobH39ZfdMuNe_AaRWsro?type=png)](https://mermaid.live/edit#pako:eNqlVmtv4jgU_StWVhVUCxQooSUjrTSQgf0w3UdhdqUto8pNnMQaY0e2M21a9b_vtU2ApLAzu_sFEvvch-8598YvXiRi4gVet9tdc001IwFarOZLFNKUKI3W3O6cnb2sOUKUUx0g-4hQS2dkQ1oBaj1gRVqdw9U_sKT4gRHV2sFhK5d0g2U5E0xIY_fDB3_uz99XpnvEijzpParf77-FTIWMiTwFYpSTU3uKRILH9Tzm86sP0wOMJlLTGiRJkpbbfjV_8PN6drbma54w8RhlWGr08dYBVPGQSpxnrpC_CakTwahAnzRlVJdoXvBIU8GVg88Hd6lO1H1sK35xD_mZ3cF9AXjVy8vPQRDECep2f9oaDNuhIAo9oRJhHqNnpDPKU4W0OD9AXd4tJKYcDZz9O5Os2Vgcjzc8GW_xXfEWjXiVLQAWo-3WcJeK2SPgyz016vaJA7VKQywIA8EIsuYKPRAIHLuVvUgr1hCKGFYqJAmC9BPKWLBV2BGEiiTNdYWy_B9BOWIdaD6bjvof9qD3g7tfcyKxFtKWZUnkVxoRFGJN3Dnh6GiPtzbD9mHtN0Sm5F5svdzHWGOo_zkYb9M74uGyvdQ4-qJsFYyFIlohkaCdm1wKyBgIq1uaHCMplHrEDMwfqc7QL6vQ-kAJHKIBZ8yGUNtjAYwYyoGESGwI0nRjNyms2sQb9jfmbC7L4RuwzVmLlMC27J3vBNc87LjdXgmNGboVhQn_UKJVmUNTpuW5sbL8vNsbTQ846dYYgVfr4_OpWNP_Tc30ODU6wxrByNEg4bqBgUmTFFIZzuF4hgWzWIfVGTA02t7AMqbPsOA8cLwhqtOwBKpwoYjjEQAO21IWjSLMjcB5SpD4SqQlqXfQ0btzjdrtj4Lb78GPaJnBQDOPtpx7Fpq1sAWa-u32Dc6NPC0cLYiwPQ5COErgbE9g909CvlwssS5kjMuLZQFHLrs_i0Ke5HD2Txx-i77ZZXsO9VdRRuKCERgzAISSwrBT1bhQjl_ojbrt0U5x6jYtE0liJIgP-6DZL6uM8G2ww26BV5ymkqTgIDb6hyIgXeaks2v4zpFGz6BMJjLkAH-mg_bnqvI0GHXQfI1yAOshpqxEy51l1VB1pKHEadcwBvmdEsXMiWLm_3vPlQy-5Xr8H1xbYR1VY3igRqdflcMn3NAQUum-m923c_-IuMJjyvyeoRLWhsqbYeyU1eD_UECd08OibpVQjpnFmECRMYAFFFfnBB1urVPCoSaMlXUHGcGxyekRTCFsTOAStYF7WBXUiraRaZ4DVbuP0ukPQQhi_L3A9vq0myWGvFtqrgqnVBE6VYR-3ZpgZivUAJvvYCQYg_OCbIz37YnfaANuLtvblPmr7jDr3XjfO3ar7t3reMD7BtMYLtz2Vrz27G157QXwGJMEF0yvPbhaAhQXWixLHnmBlgXpeJBJmnlBgpmCtyI3Eggphmm6qSAkpqDUG3ejtxf7jpdj7gUv3pMXXPV7o7E_Gk6Gw_7A9y87XukFg-Gkdzm4Glz7o8l4MvCvXjvesxDgctC7nkyu-0N_7PtXo_FobH39ZfdMuNe_AaRWsro) - -To download all of the processed data that powers this portfolio, please navigate to the folder titled `gtfs_digest` [here](https://console.cloud.google.com/storage/browser/calitp-publish-data-analysis). You will find the most recent datasets in `.parquet, .csv,.geojson` formats. Match the [readable column names](https://github.com/cal-itp/data-analyses/blob/main/gtfs_digest/readable.yml) to the table names. The data pulled from the Federal Transit Administration's National Transit Data is located [here](https://www.transit.dot.gov/ntd/data-product/2022-annual-database-agency-information). - -## Who We Are -This website was created by the [California Department of Transportation](https://dot.ca.gov/)'s Division of Data and Digital Services. We are a group of data analysts and scientists who analyze transportation data, such as General Transit Feed Specification (GTFS) data, or data from funding programs such as the Active Transportation Program. Our goal is to transform messy and indecipherable original datasets into usable, customer-friendly products to better the transportation landscape. For more of our work, visit our [portfolio](https://analysis.calitp.org/). - -Alt text Alt text - -
Caltrans®, the California Department of Transportation® and the Caltrans logo are registered service marks of the California Department of Transportation and may not be copied, distributed, displayed, reproduced or transmitted in any form without prior written permission from the California Department of Transportation. \ No newline at end of file diff --git a/portfolio/gtfs_digest_testing/_config.yml b/portfolio/gtfs_digest_testing/_config.yml deleted file mode 100644 index 7225f238b..000000000 --- a/portfolio/gtfs_digest_testing/_config.yml +++ /dev/null @@ -1,43 +0,0 @@ -# Book settings -# Learn more at https://jupyterbook.org/customize/config.html - -title: GTFS Digest -author: Cal-ITP -copyright: "2024" -#logo: calitp_logo_MAIN.png - -# Force re-execution of notebooks on each build. -# See https://jupyterbook.org/content/execute.html -execute: - execute_notebooks: 'off' - allow_errors: false - timeout: -1 - -# Define the name of the latex output file for PDF builds -latex: - latex_documents: - targetname: book.tex - -launch_buttons: - binderhub_url: "https://mybinder.org" - jupyterhub_url: "https://hubtest.k8s.calitp.jarv.us" - thebe: true - -repository: - url: https://github.com/cal-itp/data-analyses/ # Online location of your book -# path_to_book: docs # Optional path to your book, relative to the repository root - path_to_book: gtfs_digest - branch: main # Which branch of the repository should be used when creating links (optional) - -# Add GitHub buttons to your book -# See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository -html: - use_issues_button: true - use_repository_button: true - use_edit_page_button: true - google_analytics_id: 'G-JCX3Z8JZJC' - -sphinx: - config: - html_js_files: - - https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js \ No newline at end of file diff --git a/portfolio/gtfs_digest_testing/_toc.yml b/portfolio/gtfs_digest_testing/_toc.yml deleted file mode 100644 index 3ca79d99c..000000000 --- a/portfolio/gtfs_digest_testing/_toc.yml +++ /dev/null @@ -1,8 +0,0 @@ -format: jb-book -parts: -- caption: null - chapters: - - file: district_04-oakland.md - sections: - - glob: district_04-oakland/* -root: README diff --git a/portfolio/gtfs_digest_testing/district_04-oakland.md b/portfolio/gtfs_digest_testing/district_04-oakland.md deleted file mode 100644 index 1faedbdab..000000000 --- a/portfolio/gtfs_digest_testing/district_04-oakland.md +++ /dev/null @@ -1 +0,0 @@ -# District 04 - Oakland \ No newline at end of file From 936dd4d8207373376f1838b15a42b68502d639fe Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Wed, 2 Oct 2024 21:56:21 +0000 Subject: [PATCH 4/6] figuring out why merge_data segment speed portion wont run --- gtfs_digest/34_segment_speeds.ipynb | 489 ++++++++++++++++++++++++++++ 1 file changed, 489 insertions(+) create mode 100644 gtfs_digest/34_segment_speeds.ipynb diff --git a/gtfs_digest/34_segment_speeds.ipynb b/gtfs_digest/34_segment_speeds.ipynb new file mode 100644 index 000000000..64528b780 --- /dev/null +++ b/gtfs_digest/34_segment_speeds.ipynb @@ -0,0 +1,489 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "9fa6f467-d909-4d5b-aee9-3e5d4d1827de", + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import numpy as np\n", + "import pandas as pd\n", + "from segment_speed_utils import gtfs_schedule_wrangling, helpers, time_series_utils\n", + "from segment_speed_utils.project_vars import (\n", + " COMPILED_CACHED_VIEWS,\n", + " GTFS_DATA_DICT,\n", + " PROJECT_CRS,\n", + " RT_SCHED_GCS,\n", + " SCHED_GCS,\n", + " SEGMENT_GCS,\n", + ")\n", + "from shared_utils import catalog_utils, rt_dates, rt_utils" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "86f27f73-9d60-4035-b149-f627e75efbad", + "metadata": {}, + "outputs": [], + "source": [ + "pd.options.display.max_columns = 100\n", + "pd.options.display.float_format = \"{:.2f}\".format\n", + "pd.set_option(\"display.max_rows\", None)\n", + "pd.set_option(\"display.max_colwidth\", None)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "5163beae-c2b3-40f2-ba4d-b18fe927f4b8", + "metadata": {}, + "outputs": [], + "source": [ + "import merge_data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6c6f619d-a493-4ccf-9312-a37f95bd0a67", + "metadata": {}, + "outputs": [], + "source": [ + "DIGEST_SEGMENT_SPEEDS = GTFS_DATA_DICT.digest_tables.route_segment_speeds" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9b0e8913-25b7-45ad-9db3-0dcd322fb1a8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'gs://calitp-analytics-data/data-analyses/rt_vs_schedule/'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "RT_SCHED_GCS" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "c6568442-a443-4019-8e7e-b49da214ccc0", + "metadata": {}, + "outputs": [], + "source": [ + "analysis_date_list = (\n", + " rt_dates.y2024_dates + rt_dates.y2023_dates\n", + " )\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "888e5716-0e6a-4553-bc18-02a19174564c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'digest/segment_speeds'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DIGEST_SEGMENT_SPEEDS" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a91a43a4-4173-4a57-9a57-134a2c66f8cf", + "metadata": {}, + "outputs": [], + "source": [ + "df_sched = merge_data.concatenate_schedule_by_route_direction(analysis_date_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e9e5f2d1-f140-4de0-8c92-cb53ed3bb632", + "metadata": {}, + "outputs": [], + "source": [ + "primary_typology = merge_data.set_primary_typology(df_sched)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "9610d511-b9ed-4fd6-839a-863246e1c979", + "metadata": {}, + "outputs": [], + "source": [ + "segment_speeds23 = gpd.read_parquet(\"2023_seg_speeds.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "a807d1b3-0eea-4ea2-aef3-6baf6a92fbff", + "metadata": {}, + "outputs": [], + "source": [ + "segment_speeds24 = gpd.read_parquet(\"2024_seg_speeds.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "be3a74e0-cadf-46f8-8d83-25c56344709c", + "metadata": {}, + "outputs": [], + "source": [ + "segment_speeds = pd.concat([segment_speeds23,segment_speeds24])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "1b1200d1-d52a-44cb-9341-98d1b321c1af", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(4933047, 15)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "segment_speeds.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "35b4b90a-45d0-42a7-88ee-7f414f4f2c0c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(27868, 5)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "primary_typology.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "0c554a53-4031-4812-ac1e-2e28a0d6fd24", + "metadata": {}, + "outputs": [], + "source": [ + "segment_speeds2 = pd.merge(\n", + " segment_speeds,\n", + " primary_typology,\n", + " on = merge_data.route_time_cols,\n", + " how = \"left\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "2e3f9c09-5de5-4b80-9b9e-ec12d68ea7fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(4933047, 16)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "segment_speeds2.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "6c112e57-c933-4d9d-a129-d0542ff19452", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "geopandas.geodataframe.GeoDataFrame" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(segment_speeds2)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "3bc19cdf-77b0-4922-a513-cf39fad365e5", + "metadata": {}, + "outputs": [], + "source": [ + "from shared_utils import gtfs_utils_v2, publish_utils" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "d298c061-eeb5-4114-9dbf-7d627697c40e", + "metadata": {}, + "outputs": [], + "source": [ + "public_feeds = gtfs_utils_v2.filter_to_public_schedule_gtfs_dataset_keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "c68b9b37-79ae-490b-8d0f-de890df6f312", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[0;31mSignature:\u001b[0m\n", + "\u001b[0mpublish_utils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexclude_private_datasets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mpandas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mframe\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'schedule_gtfs_dataset_key'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mpublic_gtfs_dataset_keys\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mlist\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mpandas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mframe\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mSource:\u001b[0m \n", + "\u001b[0;32mdef\u001b[0m \u001b[0mexclude_private_datasets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"schedule_gtfs_dataset_key\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mpublic_gtfs_dataset_keys\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mlist\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0;34m\"\"\"\u001b[0m\n", + "\u001b[0;34m Filter out private datasets.\u001b[0m\n", + "\u001b[0;34m \"\"\"\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpublic_gtfs_dataset_keys\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFile:\u001b[0m ~/data-analyses/_shared_utils/shared_utils/publish_utils.py\n", + "\u001b[0;31mType:\u001b[0m function" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + " publish_utils.exclude_private_datasets??" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03d1cf20-9181-41d9-ba21-f35343bcadda", + "metadata": {}, + "outputs": [], + "source": [ + "segment_speeds2.pipe(\n", + " publish_utils.exclude_private_datasets, \n", + " public_gtfs_dataset_keys= public_feeds)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4188fb04-e946-4e10-b592-059d402902bd", + "metadata": {}, + "outputs": [], + "source": [ + "# segment_speeds2.to_parquet(\"segment_speeds2.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4def6c7c-fc2a-4c32-b114-9ee54ce8395c", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_parquet(f\"{RT_SCHED_GCS}{DIGEST_SEGMENT_SPEEDS}.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84059c45-cf00-4456-9c9d-86cdcfda4092", + "metadata": {}, + "outputs": [], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78747ff9-fe4b-4baf-92b2-62a7ee497cc7", + "metadata": {}, + "outputs": [], + "source": [ + "primary_typology.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6cd9ff56-acf3-4375-995f-54bb877f178c", + "metadata": {}, + "outputs": [], + "source": [ + "dates_2024 = rt_dates.y2024_dates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a4d715e-388e-41da-818f-a63825f839b4", + "metadata": {}, + "outputs": [], + "source": [ + "segment_speeds_2024 = merge_data.concatenate_segment_speeds_by_route_direction(\n", + " dates_2024\n", + " ).pipe(\n", + " merge_data.merge_in_standardized_route_names, \n", + " ).astype({\"direction_id\": \"int64\"}) #Int64 doesn't work for gdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "602fda64-9dac-4fc7-b291-e795cb2e0a4d", + "metadata": {}, + "outputs": [], + "source": [ + "segment_speeds_2024.to_parquet(f\"2024_seg_speeds.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d801095-e21f-4634-9c8e-fc3cfed52445", + "metadata": {}, + "outputs": [], + "source": [ + "dates_2023 = rt_dates.y2023_dates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32e8f527-7b59-48c9-8b37-bcd1fbc798e0", + "metadata": {}, + "outputs": [], + "source": [ + "segment_speeds_2023 = merge_data.concatenate_segment_speeds_by_route_direction(\n", + " dates_2023\n", + " ).pipe(\n", + " merge_data.merge_in_standardized_route_names, \n", + " ).astype({\"direction_id\": \"int64\"}) #Int64 doesn't work for gdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9db987d9-ccf3-4c06-81f1-fd48e0c1fcae", + "metadata": {}, + "outputs": [], + "source": [ + "segment_speeds_2023.to_parquet(f\"2023_seg_speeds.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "782bff8b-0c54-4e32-bfd8-3305186bec21", + "metadata": {}, + "outputs": [], + "source": [ + "segment_speeds = pd.concat([segment_speeds_2024, segment_speeds_2023])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f91dfd47-127f-4603-a3e2-b11252b491c5", + "metadata": {}, + "outputs": [], + "source": [ + "segment_speeds.to_parquet(f\"all_seg_speeds.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a938545a-b3b6-472c-aba8-218c31675bb4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 6de144a05e3bf2dff7de8d5ae2372e892193045a Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Thu, 3 Oct 2024 20:11:52 +0000 Subject: [PATCH 5/6] testing my script for 2024 dates --- rt_scheduled_v_ran/11_agency_agg.ipynb | 10 ++++++++++ rt_scheduled_v_ran/scripts/update_vars.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/rt_scheduled_v_ran/11_agency_agg.ipynb b/rt_scheduled_v_ran/11_agency_agg.ipynb index 92c005797..2b6585087 100644 --- a/rt_scheduled_v_ran/11_agency_agg.ipynb +++ b/rt_scheduled_v_ran/11_agency_agg.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "91553ad7-3a0e-4890-9807-522164c553f5", + "metadata": {}, + "source": [ + "## Agency Grain Metrics\n", + "* Add it to the pipeline in `rt_scheduled_v_ran/scripts/rt_v_scheduled_agency.py`\n", + "* `cd data-analyses/rt_segment_speeds && pip install -r requirements.txt && cd ../_shared_utils && make setup_env && cd ../gtfs_digest`" + ] + }, { "cell_type": "code", "execution_count": 1, diff --git a/rt_scheduled_v_ran/scripts/update_vars.py b/rt_scheduled_v_ran/scripts/update_vars.py index c91feb387..455bbd82b 100644 --- a/rt_scheduled_v_ran/scripts/update_vars.py +++ b/rt_scheduled_v_ran/scripts/update_vars.py @@ -6,7 +6,7 @@ apr2024_week = rt_dates.get_week("apr2024", exclude_wed=True) # analysis_date_list = [rt_dates.DATES["sep2024"]] -analysis_date_list = {k: v for k, v in your_dict.items() if (k[:3], k[3:]) >= ('mar', '2023')} +analysis_date_list = rt_dates.y2024_dates GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data") From 01d84abb4a4ad9414f8749298146c040a6c32bc6 Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Fri, 4 Oct 2024 00:20:30 +0000 Subject: [PATCH 6/6] added agency metrics to makefile and concat func --- rt_scheduled_v_ran/11_agency_agg.ipynb | 394 +++++++++++++++--- rt_scheduled_v_ran/scripts/Makefile | 2 +- .../scripts/rt_v_scheduled_agency.py | 2 +- 3 files changed, 340 insertions(+), 58 deletions(-) diff --git a/rt_scheduled_v_ran/11_agency_agg.ipynb b/rt_scheduled_v_ran/11_agency_agg.ipynb index 2b6585087..124f42ae8 100644 --- a/rt_scheduled_v_ran/11_agency_agg.ipynb +++ b/rt_scheduled_v_ran/11_agency_agg.ipynb @@ -122,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 33, "id": "7d685df6-f33c-430b-a878-22f7ce894aa2", "metadata": {}, "outputs": [ @@ -132,13 +132,13 @@ "'vp_agency/agency_metrics'" ] }, - "execution_count": 7, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "dict_inputs.vp_agency_metrics" + "GTFS_DATA_DICT.rt_vs_schedule_tables.vp_agency_metrics" ] }, { @@ -153,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 9, "id": "d07f3469-8630-41e5-a85f-dfc6e8dd544d", "metadata": {}, "outputs": [ @@ -185,7 +185,7 @@ " 'sep2024': '2024-09-18'}" ] }, - "execution_count": 28, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -196,7 +196,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "bb39cfdf-43a3-46e4-b200-019eb08b2de3", "metadata": {}, "outputs": [], @@ -206,7 +206,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "69d44f05-44b3-41f1-8d69-c5ddc7fb3dcd", "metadata": {}, "outputs": [], @@ -216,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "9da30f17-f428-4dad-a889-e86b2ce605f8", "metadata": {}, "outputs": [], @@ -226,7 +226,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "0d8b94cc-f6ee-4e91-b8c2-3f98048e81bf", "metadata": {}, "outputs": [], @@ -246,7 +246,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "45a50735-5253-4434-99d2-2feb28431bd4", "metadata": {}, "outputs": [], @@ -257,7 +257,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "c886f721-ae63-4b23-8100-40fafc3587d4", "metadata": {}, "outputs": [ @@ -282,59 +282,59 @@ " \n", " \n", " \n", - " 32\n", + " 3\n", " \n", " \n", " \n", " \n", " caltrans_district\n", - " 04 - Oakland\n", + " 01 - Eureka\n", " \n", " \n", " organization_name\n", - " Santa Clara Valley Transportation Authority\n", + " Redwood Coast Transit Authority\n", " \n", " \n", " schedule_gtfs_dataset_key\n", - " fb467982dcc77a7f9199bebe709bb700\n", + " 090b30e4249a7ec2b4c6a0923ed2f953\n", " \n", " \n", " total_vp\n", - " 778638\n", + " 7047\n", " \n", " \n", " vp_in_shape\n", - " 739031\n", + " 4746\n", " \n", " \n", " total_rt_service_minutes\n", - " 303399.17\n", + " 2480.40\n", " \n", " \n", " vp_per_min_agency\n", - " 2.57\n", + " 2.84\n", " \n", " \n", " spatial_accuracy_agency\n", - " 94.91\n", + " 67.35\n", " \n", " \n", "\n", "" ], "text/plain": [ - " 32\n", - "caltrans_district 04 - Oakland\n", - "organization_name Santa Clara Valley Transportation Authority\n", - "schedule_gtfs_dataset_key fb467982dcc77a7f9199bebe709bb700\n", - "total_vp 778638\n", - "vp_in_shape 739031\n", - "total_rt_service_minutes 303399.17\n", - "vp_per_min_agency 2.57\n", - "spatial_accuracy_agency 94.91" + " 3\n", + "caltrans_district 01 - Eureka\n", + "organization_name Redwood Coast Transit Authority\n", + "schedule_gtfs_dataset_key 090b30e4249a7ec2b4c6a0923ed2f953\n", + "total_vp 7047\n", + "vp_in_shape 4746\n", + "total_rt_service_minutes 2480.40\n", + "vp_per_min_agency 2.84\n", + "spatial_accuracy_agency 67.35" ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -345,7 +345,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "3985f03a-bcd5-41f9-bcff-a5f3a1436603", "metadata": {}, "outputs": [ @@ -421,7 +421,7 @@ "1 95.89 " ] }, - "execution_count": 15, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -440,7 +440,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "id": "02a30975-d9d5-4174-8a5f-47c1e80970df", "metadata": {}, "outputs": [], @@ -486,7 +486,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "id": "7696d284-c8cb-4739-8131-dc873933994e", "metadata": {}, "outputs": [], @@ -496,7 +496,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "id": "815a5ad4-2422-44ed-86c2-2bd5c4eae693", "metadata": {}, "outputs": [], @@ -506,7 +506,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "id": "85a4d3ea-cf58-44e7-901f-59414416e092", "metadata": {}, "outputs": [], @@ -519,7 +519,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "id": "0b62649f-15b6-459d-ab1d-b2627521abfe", "metadata": {}, "outputs": [ @@ -529,7 +529,7 @@ "0.8153321332404478" ] }, - "execution_count": 20, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -540,7 +540,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "id": "82ce25f9-3088-4d10-8427-f0fdf4f8c05f", "metadata": {}, "outputs": [ @@ -602,7 +602,7 @@ "spatial_accuracy_agency 81.53" ] }, - "execution_count": 21, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -613,7 +613,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "id": "d006206a-48e0-49b4-ba68-cd279fd7f0dc", "metadata": {}, "outputs": [ @@ -638,44 +638,44 @@ " \n", " \n", " \n", - " 34\n", + " 42\n", " \n", " \n", " \n", " \n", " caltrans_district\n", - " 04 - Oakland\n", + " 05 - San Luis Obispo\n", " \n", " \n", " organization_name\n", - " Sonoma-Marin Area Rail Transit District\n", + " Santa Cruz Metropolitan Transit District\n", " \n", " \n", " schedule_gtfs_dataset_key\n", - " 0881af3822466784992a49f1cc57d38f\n", + " 43d8d305ee692724a532f30ea63a1cbe\n", " \n", " \n", " vp_per_min_agency\n", - " 3.01\n", + " 1.52\n", " \n", " \n", " spatial_accuracy_agency\n", - " 99.61\n", + " 94.49\n", " \n", " \n", "\n", "" ], "text/plain": [ - " 34\n", - "caltrans_district 04 - Oakland\n", - "organization_name Sonoma-Marin Area Rail Transit District\n", - "schedule_gtfs_dataset_key 0881af3822466784992a49f1cc57d38f\n", - "vp_per_min_agency 3.01\n", - "spatial_accuracy_agency 99.61" + " 42\n", + "caltrans_district 05 - San Luis Obispo\n", + "organization_name Santa Cruz Metropolitan Transit District\n", + "schedule_gtfs_dataset_key 43d8d305ee692724a532f30ea63a1cbe\n", + "vp_per_min_agency 1.52\n", + "spatial_accuracy_agency 94.49" ] }, - "execution_count": 22, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -694,11 +694,293 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "2038f4d6-1d3e-4331-b08f-aa2812a6b749", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'gs://calitp-analytics-data/data-analyses/rt_vs_schedule/'" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "RT_SCHED_GCS" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "cf1aab5e-f375-44a2-bddb-78830f29f762", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'vp_agency/agency_metrics'" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dict_inputs.vp_agency_metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "1fea8b05-c690-49c8-82c9-39c3f2f17e98", + "metadata": {}, + "outputs": [], + "source": [ + "sept_df = pd.read_parquet(\"gs://calitp-analytics-data/data-analyses/rt_vs_schedule/vp_agency/agency_metrics_TEST_2024-09-18.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "1699a6a0-223d-4667-b3b4-b7650387cb7f", + "metadata": {}, + "outputs": [], + "source": [ + "mar_df = pd.read_parquet(\"gs://calitp-analytics-data/data-analyses/rt_vs_schedule/vp_agency/agency_metrics_TEST_2024-03-13.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "085868d8-ee1a-4849-b13a-27c90ac9f8ac", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caltrans_districtorganization_nameschedule_gtfs_dataset_keyvp_per_min_agencyspatial_accuracy_agency
001 - EurekaCity of Eurekaa253a8d7acd57657bb98050f37dd6b0f2.9096.56
101 - EurekaLake Transit Authority0a3c0b21c85fb09f8db91599e14dd7f72.4496.45
\n", + "
" + ], + "text/plain": [ + " caltrans_district organization_name schedule_gtfs_dataset_key \\\n", + "0 01 - Eureka City of Eureka a253a8d7acd57657bb98050f37dd6b0f \n", + "1 01 - Eureka Lake Transit Authority 0a3c0b21c85fb09f8db91599e14dd7f7 \n", + "\n", + " vp_per_min_agency spatial_accuracy_agency \n", + "0 2.90 96.56 \n", + "1 2.44 96.45 " + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mar_df.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "ea934f21-c2e4-42dd-be5a-32b740008ba2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'vp_agency/agency_metrics_TEST_'" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f\"{GTFS_DATA_DICT.rt_vs_schedule_tables.vp_agency_metrics}_TEST_\"" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "f4881744-f5dc-4a7a-9203-88ff661741ca", + "metadata": {}, + "outputs": [], + "source": [ + "sort_cols = [\"schedule_gtfs_dataset_key\", \"service_date\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "f76206d8-23b8-462f-8e61-9f839b12eeb7", + "metadata": {}, + "outputs": [], + "source": [ + "def concatenate_agency_level_metrics(\n", + " date_list: list\n", + ") -> pd.DataFrame:\n", + " FILE = f\"{GTFS_DATA_DICT.rt_vs_schedule_tables.vp_agency_metrics}_TEST\"\n", + " \n", + " df = time_series_utils.concatenate_datasets_across_dates(\n", + " RT_SCHED_GCS,\n", + " FILE,\n", + " date_list,\n", + " data_type = \"df\",\n", + " ).sort_values(sort_cols).reset_index(drop=True)\n", + " \n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "db75c693-2bd5-4631-9fe9-b0e302b89abf", + "metadata": {}, + "outputs": [], + "source": [ + "analysis_date_list = rt_dates.y2024_dates" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "39f4dcc0-ead6-46e7-8e5a-4eebf5d03544", + "metadata": {}, "outputs": [], - "source": [] + "source": [ + "final_df = concatenate_agency_level_metrics(analysis_date_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "5ade9cb8-893b-4e27-8151-9abc31ea60c2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caltrans_districtorganization_nameschedule_gtfs_dataset_keyvp_per_min_agencyspatial_accuracy_agencyservice_date
004 - OaklandMarin County Transit District015d67d5b75b5cf2b710bbadadfb75f52.6090.882024-01-17
104 - OaklandMarin County Transit District015d67d5b75b5cf2b710bbadadfb75f52.6890.432024-02-14
\n", + "
" + ], + "text/plain": [ + " caltrans_district organization_name \\\n", + "0 04 - Oakland Marin County Transit District \n", + "1 04 - Oakland Marin County Transit District \n", + "\n", + " schedule_gtfs_dataset_key vp_per_min_agency \\\n", + "0 015d67d5b75b5cf2b710bbadadfb75f5 2.60 \n", + "1 015d67d5b75b5cf2b710bbadadfb75f5 2.68 \n", + "\n", + " spatial_accuracy_agency service_date \n", + "0 90.88 2024-01-17 \n", + "1 90.43 2024-02-14 " + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df.head(2)" + ] } ], "metadata": { diff --git a/rt_scheduled_v_ran/scripts/Makefile b/rt_scheduled_v_ran/scripts/Makefile index be11499d3..d228904ad 100644 --- a/rt_scheduled_v_ran/scripts/Makefile +++ b/rt_scheduled_v_ran/scripts/Makefile @@ -2,7 +2,7 @@ rt_sched_pipeline: # cd rt_segment_speeds && pip install -r requirements.txt && cd ../_shared_utils && make setup_env && cd ../ python rt_v_scheduled_trip.py python rt_v_scheduled_routes.py - + #python rt_v_scheduled_agency.py Amanda commenting this out for now # this can be run after rt_segment_speeds make rt_stop_times_pipeline is run schedule_rt_stop_times_table: diff --git a/rt_scheduled_v_ran/scripts/rt_v_scheduled_agency.py b/rt_scheduled_v_ran/scripts/rt_v_scheduled_agency.py index 702578fe6..0e6a2555e 100644 --- a/rt_scheduled_v_ran/scripts/rt_v_scheduled_agency.py +++ b/rt_scheduled_v_ran/scripts/rt_v_scheduled_agency.py @@ -44,7 +44,7 @@ def agency_metrics(analysis_date: str, dict_inputs: dict) -> pd.DataFrame: # Cleanrt_V agg1 = agg1.drop(columns=sum_cols) - # Save + # Save: take out test later agg1.to_parquet(f"{RT_SCHED_GCS}{AGENCY_EXPORT}_TEST_{analysis_date}.parquet") end = datetime.datetime.now()