diff --git a/project_prioritization/metrics/metrics_testing_land_use.ipynb b/project_prioritization/metrics/metrics_testing_land_use.ipynb new file mode 100644 index 000000000..fbb4f61c1 --- /dev/null +++ b/project_prioritization/metrics/metrics_testing_land_use.ipynb @@ -0,0 +1,3706 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "0a4d9418-3035-4511-bdde-8def6baa892d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.9/site-packages/geopandas/_compat.py:124: UserWarning: The Shapely GEOS version (3.11.1-CAPI-1.17.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.1-CAPI-1.16.0). Conversions between both will be slow.\n", + " warnings.warn(\n", + "/tmp/ipykernel_372/1249887178.py:3: DeprecationWarning: Shapely 2.0 is installed, but because PyGEOS is also installed, GeoPandas still uses PyGEOS by default. However, starting with version 0.14, the default will switch to Shapely. To force to use Shapely 2.0 now, you can either uninstall PyGEOS or set the environment variable USE_PYGEOS=0. You can do this before starting the Python process, or in your code before importing geopandas:\n", + "\n", + "import os\n", + "os.environ['USE_PYGEOS'] = '0'\n", + "import geopandas\n", + "\n", + "In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).\n", + " import geopandas as gpd\n" + ] + } + ], + "source": [ + "# header info\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "from siuba import *\n", + "import numpy as np\n", + "from shared_utils import utils, geography_utils\n", + "pd.options.display.max_columns = 100\n", + "import gcsfs\n", + "from calitp_data_analysis.sql import to_snakecase\n", + "from calitp_data_analysis import get_fs\n", + "fs = get_fs()\n", + "\n", + "GCS_FILE_PATH = \"gs://calitp-analytics-data/data-analyses/project_prioritization/\"" + ] + }, + { + "cell_type": "markdown", + "id": "aa94981f-da47-40e2-8786-6bb717bccf57", + "metadata": {}, + "source": [ + "# CSIS Data Entry Scoring - Land Use\n", + "\n", + "## Part 1: Evaluate Urban/Rural" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "67749d7c-fc53-49c1-9d6c-b9be1fcd740a", + "metadata": {}, + "outputs": [], + "source": [ + "# read in project locations\n", + "project_geo = gpd.read_parquet(f'{GCS_FILE_PATH}Survey123_Geo/cleaned_survey123_sample13.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4a133894-fbac-4c72-8d8d-71e4686846ed", + "metadata": {}, + "outputs": [], + "source": [ + "# Update projection/crs and units\n", + "project_geo = project_geo.to_crs(geography_utils.CA_NAD83Albers)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "dbf51379-aaf2-4c7d-899b-bf59222efb7f", + "metadata": {}, + "outputs": [], + "source": [ + "# buffer 100 ft (30m)\n", + "project_geo['b100'] = project_geo.buffer(30)\n", + "project_geo = project_geo.set_geometry('b100')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cb2ca859-4bd3-4289-909f-2384b1eeee32", + "metadata": {}, + "outputs": [], + "source": [ + "# dissolve by project\n", + "project_geo_dissolve = project_geo.dissolve('projname').reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "48da0f62-3494-46e6-8b96-d3c4e1fe32a0", + "metadata": {}, + "outputs": [], + "source": [ + "# load in combined urbanized areas (PRC 21071, PRC 21094.5, Census-based)\n", + "with get_fs().open(f'{GCS_FILE_PATH}combined_urbanized_areas.geojson') as f:\n", + " urbanized_areas = gpd.read_file(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8ad796ed-31b5-48f2-8c2f-893a54865e92", + "metadata": {}, + "outputs": [], + "source": [ + "# we only need 1 observation, dissolve\n", + "urbanized_areas = urbanized_areas.dissolve()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1b6b4860-188c-43ff-9e48-35fa4581ad67", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "