From a15c5452c86bf32cfbd9d49c031408d9930e7c8c Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Wed, 18 Sep 2024 00:57:02 +0000 Subject: [PATCH] cleanup, add web map! --- gtfs_schedule/09_bus_stops_in_ca.ipynb | 348 ++++++++++++++++--------- 1 file changed, 227 insertions(+), 121 deletions(-) diff --git a/gtfs_schedule/09_bus_stops_in_ca.ipynb b/gtfs_schedule/09_bus_stops_in_ca.ipynb index c3f29f41b..f62446eff 100644 --- a/gtfs_schedule/09_bus_stops_in_ca.ipynb +++ b/gtfs_schedule/09_bus_stops_in_ca.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 39, "id": "dcac5d5c-11b1-4782-aad1-228d40709df7", "metadata": {}, "outputs": [], @@ -29,7 +29,7 @@ "from siuba import *\n", "\n", "from segment_speed_utils import helpers\n", - "from shared_utils import catalog_utils, rt_dates, gtfs_utils_v2\n", + "from shared_utils import catalog_utils, rt_dates, gtfs_utils_v2, rt_utils\n", "\n", "SCHED_GCS = \"gs://calitp-analytics-data/data-analyses/gtfs_schedule/\"" ] @@ -174,7 +174,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "d24fb928-23b7-424b-8941-d1378bb2fff1", + "id": "aa3d7515-2b4a-4113-bbe0-990109a6270e", "metadata": {}, "outputs": [ { @@ -186,7 +186,7 @@ } ], "source": [ - "aug_stops_for_export = process_for_export(sjoin_shs(ca_stops))" + "aug_shs_joined = sjoin_shs(ca_stops)" ] }, { @@ -196,7 +196,7 @@ "metadata": {}, "outputs": [], "source": [ - "# stops_on_shn.explore()" + "# aug_shs_joined.explore()" ] }, { @@ -269,8 +269,8 @@ " base64_url\n", " gtfs_dataset_key\n", " name\n", - " regional_feed_type\n", " type\n", + " regional_feed_type\n", " \n", " \n", " \n", @@ -283,8 +283,8 @@ " aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm...\n", " 239f3baf3dd3b9e9464f66a777f9897d\n", " SBMTD Schedule\n", - " None\n", " schedule\n", + " None\n", " \n", " \n", "\n", @@ -300,11 +300,11 @@ " base64_url \\\n", "40 aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm... \n", "\n", - " gtfs_dataset_key name regional_feed_type \\\n", - "40 239f3baf3dd3b9e9464f66a777f9897d SBMTD Schedule None \n", + " gtfs_dataset_key name type \\\n", + "40 239f3baf3dd3b9e9464f66a777f9897d SBMTD Schedule schedule \n", "\n", - " type \n", - "40 schedule " + " regional_feed_type \n", + "40 None " ] }, "execution_count": 14, @@ -471,8 +471,8 @@ " base64_url\n", " gtfs_dataset_key\n", " name\n", - " regional_feed_type\n", " type\n", + " regional_feed_type\n", " \n", " \n", " \n", @@ -485,8 +485,8 @@ " aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm...\n", " 239f3baf3dd3b9e9464f66a777f9897d\n", " SBMTD Schedule\n", - " None\n", " schedule\n", + " None\n", " \n", " \n", "\n", @@ -502,11 +502,11 @@ " base64_url \\\n", "40 aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm... \n", "\n", - " gtfs_dataset_key name regional_feed_type \\\n", - "40 239f3baf3dd3b9e9464f66a777f9897d SBMTD Schedule None \n", + " gtfs_dataset_key name type \\\n", + "40 239f3baf3dd3b9e9464f66a777f9897d SBMTD Schedule schedule \n", "\n", - " type \n", - "40 schedule " + " regional_feed_type \n", + "40 None " ] }, "execution_count": 18, @@ -623,51 +623,51 @@ " \n", " \n", " 0\n", - " 2402300a33d09fc032760d297284eead\n", + " 0f0642bc7d085c2fa77231b43d039063\n", " 2024-08-21\n", " dc50d111693842f1f2e8067cb0ad11e1\n", - " 46\n", - " America/Los_Angeles\n", " 1\n", - " 2024-08-21 09:02:00\n", - " 2024-08-21 09:02:00\n", + " America/Los_Angeles\n", + " 110\n", + " 2024-08-21 06:10:00\n", + " 2024-08-21 22:15:00\n", " 2024-08-06 03:00:22.899627+00:00\n", " None\n", " ...\n", - " ccbdb033135b5b8540250406d0520282\n", + " 29bad4f8e88af65c6df32ed091beb395\n", " None\n", " None\n", - " 136\n", - " Cathedral Oaks & Alpha Resource\n", + " 1\n", + " Modoc & Portesuello\n", " None\n", " None\n", " America/Los_Angeles\n", " 2\n", - " POINT (-119.77610 34.45135)\n", + " POINT (-119.72607 34.42486)\n", " \n", " \n", " 1\n", - " 2c586cda06751c44f0a815f4e8c632b9\n", + " 0fc73115d68b2a26deb739df6bd42b15\n", " 2024-08-21\n", " dc50d111693842f1f2e8067cb0ad11e1\n", - " 51\n", + " 81\n", " America/Los_Angeles\n", - " 1\n", - " 2024-08-21 15:12:03\n", - " 2024-08-21 15:12:03\n", + " 59\n", + " 2024-08-21 06:14:20\n", + " 2024-08-21 21:44:20\n", " 2024-08-06 03:00:22.899627+00:00\n", " None\n", " ...\n", - " 28d3ba5e59836c8536174f93bb72fbf7\n", + " bb36d3a925d25cb0ac9570ef5f1e792e\n", " None\n", " None\n", - " 153\n", - " Cota & Olive\n", + " 2\n", + " San Andres & Valerio\n", " None\n", " None\n", " America/Los_Angeles\n", " 2\n", - " POINT (-119.69137 34.42294)\n", + " POINT (-119.71894 34.42071)\n", " \n", " \n", "\n", @@ -676,36 +676,32 @@ ], "text/plain": [ " key service_date \\\n", - "0 2402300a33d09fc032760d297284eead 2024-08-21 \n", - "1 2c586cda06751c44f0a815f4e8c632b9 2024-08-21 \n", + "0 0f0642bc7d085c2fa77231b43d039063 2024-08-21 \n", + "1 0fc73115d68b2a26deb739df6bd42b15 2024-08-21 \n", "\n", " feed_key stop_id feed_timezone \\\n", - "0 dc50d111693842f1f2e8067cb0ad11e1 46 America/Los_Angeles \n", - "1 dc50d111693842f1f2e8067cb0ad11e1 51 America/Los_Angeles \n", + "0 dc50d111693842f1f2e8067cb0ad11e1 1 America/Los_Angeles \n", + "1 dc50d111693842f1f2e8067cb0ad11e1 81 America/Los_Angeles \n", "\n", " stop_event_count first_stop_arrival_datetime_pacific \\\n", - "0 1 2024-08-21 09:02:00 \n", - "1 1 2024-08-21 15:12:03 \n", + "0 110 2024-08-21 06:10:00 \n", + "1 59 2024-08-21 06:14:20 \n", "\n", " last_stop_departure_datetime_pacific _feed_valid_from \\\n", - "0 2024-08-21 09:02:00 2024-08-06 03:00:22.899627+00:00 \n", - "1 2024-08-21 15:12:03 2024-08-06 03:00:22.899627+00:00 \n", + "0 2024-08-21 22:15:00 2024-08-06 03:00:22.899627+00:00 \n", + "1 2024-08-21 21:44:20 2024-08-06 03:00:22.899627+00:00 \n", "\n", " route_type_0 ... stop_key tts_stop_name \\\n", - "0 None ... ccbdb033135b5b8540250406d0520282 None \n", - "1 None ... 28d3ba5e59836c8536174f93bb72fbf7 None \n", + "0 None ... 29bad4f8e88af65c6df32ed091beb395 None \n", + "1 None ... bb36d3a925d25cb0ac9570ef5f1e792e None \n", "\n", - " parent_station stop_code stop_name stop_desc \\\n", - "0 None 136 Cathedral Oaks & Alpha Resource None \n", - "1 None 153 Cota & Olive None \n", + " parent_station stop_code stop_name stop_desc location_type \\\n", + "0 None 1 Modoc & Portesuello None None \n", + "1 None 2 San Andres & Valerio None None \n", "\n", - " location_type stop_timezone_coalesced wheelchair_boarding \\\n", - "0 None America/Los_Angeles 2 \n", - "1 None America/Los_Angeles 2 \n", - "\n", - " geometry \n", - "0 POINT (-119.77610 34.45135) \n", - "1 POINT (-119.69137 34.42294) \n", + " stop_timezone_coalesced wheelchair_boarding geometry \n", + "0 America/Los_Angeles 2 POINT (-119.72607 34.42486) \n", + "1 America/Los_Angeles 2 POINT (-119.71894 34.42071) \n", "\n", "[2 rows x 33 columns]" ] @@ -865,7 +861,7 @@ } ], "source": [ - "additional_shs_stops = sjoin_shs(stops_to_add)" + "additional_shs_joined = sjoin_shs(stops_to_add)" ] }, { @@ -875,147 +871,257 @@ "metadata": {}, "outputs": [], "source": [ - "# additional_shs_stops.explore()" + "# additional_shs_joined.explore()" + ] + }, + { + "cell_type": "markdown", + "id": "3b46f7b6-fa94-4625-9f7a-5d43cf880a26", + "metadata": {}, + "source": [ + "# new combined export" ] }, { "cell_type": "code", "execution_count": 35, - "id": "b46b856e-cfd0-42a6-97ff-12dec11a2c57", + "id": "c3381bce-83c4-40e7-930d-000763302eea", "metadata": {}, "outputs": [], "source": [ - "analysis_date = rt_dates.DATES['jul2024']" + "all_spatial = pd.concat([aug_shs_joined, additional_shs_joined])" ] }, { "cell_type": "code", - "execution_count": 36, - "id": "54b5d3f0-9aa4-492a-972a-e583a75a71cb", + "execution_count": 37, + "id": "f0a4544b-df05-4107-a2e5-63eb4ce8bf2f", "metadata": {}, "outputs": [], "source": [ - "import sys\n", - "sys.path.append('../gtfs_funnel/')" + "stops_for_export = process_for_export(all_spatial)" ] }, { "cell_type": "code", - "execution_count": 37, - "id": "4fb0ff61-7542-48cd-89a4-a44ee97d86ba", + "execution_count": 38, + "id": "ad05cbdb-ef63-4beb-8aac-fba9df5a7e22", "metadata": {}, "outputs": [], "source": [ - "import update_vars # no update_vars here so won't clash " + "stops_for_export.to_csv(\"ca_stops_revised.csv\", index=False)" + ] + }, + { + "cell_type": "markdown", + "id": "17cf4efa-f233-4544-9865-aeb03eb30df7", + "metadata": {}, + "source": [ + "## a map?\n", + "\n", + "using webapp" ] }, { "cell_type": "code", - "execution_count": 38, - "id": "e1833220-40ec-4abb-92d1-b0c6631d3fee", + "execution_count": 40, + "id": "92db6d36-c7ea-41ff-9a6b-ae290fd46634", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'gs://calitp-analytics-data/data-analyses/rt_delay/compiled_cached_views/'" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "stops_gcs = update_vars.COMPILED_CACHED_VIEWS\n", - "stops_gcs" + "shn = gpd.read_parquet(rt_utils.SHN_PATH)[['Route', 'County', 'District',\n", + " 'RouteType', 'geometry']]" ] }, { "cell_type": "code", - "execution_count": 39, - "id": "7bf69575-8f15-411d-b7d1-3d6ddaa4f963", + "execution_count": 52, + "id": "e4d5776e-4c1e-48b9-b8a0-9a49b7f16841", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'stops'" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "update_vars.GTFS_DATA_DICT.schedule_downloads.stops # TIL you can . access a dict" + "to_map = all_spatial.drop(columns=['base64_url'])\n", + "# to_map['color'] = (10, 29, 245)" ] }, { "cell_type": "code", - "execution_count": 40, - "id": "fd76e0dd-d4c0-4e1f-a380-5eabe8027115", + "execution_count": 53, + "id": "f4c8fc45-0ad8-4819-a3ed-662940186030", "metadata": {}, "outputs": [], "source": [ - "july_stops = gpd.read_parquet(f'{stops_gcs}stops_{analysis_date}.parquet')" + "import calitp_data_analysis" ] }, { "cell_type": "code", - "execution_count": 41, - "id": "dcc2eb23-ab88-441c-bb98-5709d7b62f65", + "execution_count": 56, + "id": "8e28af68-1186-4eae-b78f-4ad8597b1575", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Index(['feed_key', 'service_date', 'feed_timezone',\n", - " 'first_stop_arrival_datetime_pacific',\n", - " 'last_stop_departure_datetime_pacific', 'stop_id', 'stop_key',\n", - " 'stop_name', 'stop_event_count', 'route_type_0', 'route_type_1',\n", - " 'route_type_2', 'route_type_3', 'route_type_4', 'route_type_5',\n", - " 'route_type_6', 'route_type_7', 'route_type_11', 'route_type_12',\n", - " 'missing_route_type', 'geometry'],\n", - " dtype='object')" + "120" ] }, - "execution_count": 41, + "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "july_stops.columns" + "len(to_map.agency.unique())" ] }, { - "cell_type": "markdown", - "id": "3b46f7b6-fa94-4625-9f7a-5d43cf880a26", + "cell_type": "code", + "execution_count": 63, + "id": "fa6c7df5-40a5-4324-97b7-056aa5d0af65", "metadata": {}, + "outputs": [], "source": [ - "# new combined export" + "#calitp_data_analysis.calitp_color_palette # doesn't work?" ] }, { "cell_type": "code", - "execution_count": 42, - "id": "f0a4544b-df05-4107-a2e5-63eb4ce8bf2f", + "execution_count": 64, + "id": "eacdcc13-4246-4cb0-8262-94864f717607", "metadata": {}, "outputs": [], "source": [ - "stops_for_export = pd.concat([aug_stops_for_export, process_for_export(additional_shs_stops)])" + "CALITP_CATEGORY_BOLD_COLORS = [\n", + " \"#136C97\", # darker blue\n", + " \"#E16B26\", # orange\n", + " \"#F6BF16\", # yellow\n", + " \"#00896B\", # green\n", + " \"#7790A3\", # lighter blue\n", + " \"#5B559C\", # purple\n", + "]" ] }, { "cell_type": "code", - "execution_count": 43, - "id": "ad05cbdb-ef63-4beb-8aac-fba9df5a7e22", + "execution_count": 66, + "id": "6f0cd2cb-6a50-47f8-951a-5f193c3350c0", "metadata": {}, "outputs": [], "source": [ - "stops_for_export.to_csv(\"ca_stops_revised.csv\", index=False)" + "full_categories = CALITP_CATEGORY_BOLD_COLORS * 20" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "6b89c935-aeaa-4710-bb00-f0e0b9875d20", + "metadata": {}, + "outputs": [], + "source": [ + "color_dict = dict(zip(to_map.agency.unique(), full_categories))" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "d2fe6b19-ee91-4350-8ef8-519fe9fa63df", + "metadata": {}, + "outputs": [], + "source": [ + "# https://www.30secondsofcode.org/python/s/hex-to-rgb/\n", + "def hex_to_rgb(hex):\n", + " return tuple(int(hex[i:i+2], 16) for i in (0, 2, 4))" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "0ba9270e-5fce-4cab-8d3d-e909d1d321f0", + "metadata": {}, + "outputs": [], + "source": [ + "to_map['color'] = to_map.agency.apply(lambda x: hex_to_rgb(color_dict[x][1:]))" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "a0c5aa1e-feb7-4492-8698-8826f55e04eb", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/jovyan/data-analyses/_shared_utils/shared_utils/rt_utils.py:892: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n", + "\n", + " centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "writing to calitp-map-tiles/shs_stops/shs.geojson.gz\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/jovyan/data-analyses/_shared_utils/shared_utils/rt_utils.py:892: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n", + "\n", + " centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "writing to calitp-map-tiles/shs_stops/stops4.geojson.gz\n" + ] + } + ], + "source": [ + "export_result = rt_utils.set_state_export(shn, subfolder = 'shs_stops/', filename = 'shs',\n", + " map_type = 'state_highway_network')\n", + "spa_map_state = export_result['state_dict']\n", + "\n", + "combined_state = rt_utils.set_state_export(\n", + " to_map, subfolder = 'shs_stops/', filename=f'stops4',\n", + " existing_state=spa_map_state, map_title=f'SHS with Stops Sep 2024')" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "9cc52ae4-8d39-44f5-9d95-799cfa64515a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'state_dict': {'name': 'null',\n", + " 'layers': [{'name': 'Map',\n", + " 'url': 'https://storage.googleapis.com/calitp-map-tiles/shs_stops/shs.geojson.gz',\n", + " 'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5},\n", + " 'type': 'state_highway_network'},\n", + " {'name': 'SHS with Stops Sep 2024',\n", + " 'url': 'https://storage.googleapis.com/calitp-map-tiles/shs_stops/stops4.geojson.gz',\n", + " 'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5}}],\n", + " 'lat_lon': (36.186293217006714, -120.09747480157621),\n", + " 'zoom': 13},\n", + " 'spa_link': 'https://embeddable-maps.calitp.org/?state=eyJuYW1lIjogIm51bGwiLCAibGF5ZXJzIjogW3sibmFtZSI6ICJNYXAiLCAidXJsIjogImh0dHBzOi8vc3RvcmFnZS5nb29nbGVhcGlzLmNvbS9jYWxpdHAtbWFwLXRpbGVzL3Noc19zdG9wcy9zaHMuZ2VvanNvbi5neiIsICJwcm9wZXJ0aWVzIjogeyJzdHJva2VkIjogZmFsc2UsICJoaWdobGlnaHRfc2F0dXJhdGlvbl9tdWx0aXBsaWVyIjogMC41fSwgInR5cGUiOiAic3RhdGVfaGlnaHdheV9uZXR3b3JrIn0sIHsibmFtZSI6ICJTSFMgd2l0aCBTdG9wcyBTZXAgMjAyNCIsICJ1cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvc2hzX3N0b3BzL3N0b3BzNC5nZW9qc29uLmd6IiwgInByb3BlcnRpZXMiOiB7InN0cm9rZWQiOiBmYWxzZSwgImhpZ2hsaWdodF9zYXR1cmF0aW9uX211bHRpcGxpZXIiOiAwLjV9fV0sICJsYXRfbG9uIjogWzM2LjE4NjI5MzIxNzAwNjcxNCwgLTEyMC4wOTc0NzQ4MDE1NzYyMV0sICJ6b29tIjogMTN9'}" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combined_state" ] } ],