Merge pull request #842 from cal-itp/csis-land-use

CSIS Metric Testing: Land Use + Natural Resources
cal-itp · Aug 30, 2023 · 4ad5937 · 4ad5937
2 parents 743f6e8 + 55afb81
commit 4ad5937
Show file tree

Hide file tree

Showing 4 changed files with 4,316 additions and 194 deletions.
diff --git a/project_prioritization/metrics/metrics_testing_land_use.ipynb b/project_prioritization/metrics/metrics_testing_land_use.ipynb
diff --git a/project_prioritization/metrics/metrics_testing_safety.ipynb b/project_prioritization/metrics/metrics_testing_safety.ipynb
diff --git a/project_prioritization/metrics/read_data_entry.ipynb b/project_prioritization/metrics/read_data_entry.ipynb
@@ -2,10 +2,27 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "34104da9-6af5-42e4-981c-c74ccf987005",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.9/site-packages/geopandas/_compat.py:124: UserWarning: The Shapely GEOS version (3.11.1-CAPI-1.17.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.1-CAPI-1.16.0). Conversions between both will be slow.\n",
+      "  warnings.warn(\n",
+      "/tmp/ipykernel_311/1872485643.py:4: DeprecationWarning: Shapely 2.0 is installed, but because PyGEOS is also installed, GeoPandas still uses PyGEOS by default. However, starting with version 0.14, the default will switch to Shapely. To force to use Shapely 2.0 now, you can either uninstall PyGEOS or set the environment variable USE_PYGEOS=0. You can do this before starting the Python process, or in your code before importing geopandas:\n",
+      "\n",
+      "import os\n",
+      "os.environ['USE_PYGEOS'] = '0'\n",
+      "import geopandas\n",
+      "\n",
+      "In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).\n",
+      "  import geopandas as gpd\n"
+     ]
+    }
+   ],
    "source": [
     "import os\n",
     "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(1_000_000_000_000) ## 1TB?\n",
@@ -162,10 +179,149 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "598992c9-c36a-4702-802d-aa75366c7410",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "# VMT\n",
+    "vmt = to_snakecase(pd.read_excel(f'{GCS_FILE_PATH}Metrics_Scoring_All_Projects.xlsx', sheet_name=\"VMT\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "8f9e5dfa-d007-478d-99e0-5f3bdcf2ee6a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_311/65901047.py:1: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+      "  vmt.columns = vmt.columns.str.replace('?', '')\n"
+     ]
+    }
+   ],
+   "source": [
+    "vmt.columns = vmt.columns.str.replace('?', '')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "e51f873a-f82e-4df1-b87f-bb2b59fc6d5e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 80 entries, 0 to 79\n",
+      "Data columns (total 17 columns):\n",
+      " #   Column                                           Non-Null Count  Dtype  \n",
+      "---  ------                                           --------------  -----  \n",
+      " 0   submission_log_number                            80 non-null     int64  \n",
+      " 1   program                                          80 non-null     object \n",
+      " 2   project_name                                     80 non-null     object \n",
+      " 3   data_enterer_name                                54 non-null     object \n",
+      " 4   done_y_n                                         50 non-null     object \n",
+      " 5   notes                                            53 non-null     object \n",
+      " 6   estimated_change_in_vmt__total_for_project_      39 non-null     object \n",
+      " 7   is_specific_                                     52 non-null     object \n",
+      " 8   project_contains_new_lane_miles_                 50 non-null     object \n",
+      " 9   total_new_lane_miles                             45 non-null     object \n",
+      " 10  project_contains_new_interchange_                51 non-null     object \n",
+      " 11  project_contains_new_transit_riders_             52 non-null     object \n",
+      " 12  project_contains_active_transportation_element_  52 non-null     object \n",
+      " 13  score                                            53 non-null     float64\n",
+      " 14  additional_notes                                 18 non-null     object \n",
+      " 15  hunter_esimtate_comments                         6 non-null      object \n",
+      " 16  henry_estimate_comments                          13 non-null     object \n",
+      "dtypes: float64(1), int64(1), object(15)\n",
+      "memory usage: 10.8+ KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "vmt.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "a8f83277-c61e-4d0b-9b59-37b6ba747012",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vmt['estimated_change_in_vmt__total_for_project_'] = vmt['estimated_change_in_vmt__total_for_project_'].str.replace('\\n', '')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "6635b7d9-c04a-4d57-9aa8-be8fb1ef05b1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0       14\n",
+       "N       14\n",
+       "?        3\n",
+       "3.5      1\n",
+       "7.8      1\n",
+       "16       1\n",
+       "17.2     1\n",
+       "10.4     1\n",
+       "7.5      1\n",
+       "32       1\n",
+       "30.4     1\n",
+       "4.2      1\n",
+       "1.3      1\n",
+       "22.2     1\n",
+       "14       1\n",
+       "6.7      1\n",
+       "4.4      1\n",
+       "Name: total_new_lane_miles, dtype: int64"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "vmt.total_new_lane_miles.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "d4eaa6d4-1895-40ff-99e1-8e5d4d4ceaf9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vmt = vmt.astype({'project_contains_new_lane_miles_':'str', 'total_new_lane_miles':'str'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "6dffb368-6ac8-4d16-8355-221aea3b0381",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vmt.to_parquet(f'{GCS_FILE_PATH}data_entry_raw_vmt.parquet')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53334538-9502-4ed3-92f3-0ad5b00ac993",
+   "metadata": {},
+   "outputs": [],
    "source": []
   }
  ],

diff --git a/project_prioritization/metrics/read_data_entry.py b/project_prioritization/metrics/read_data_entry.py
@@ -24,5 +24,12 @@
 
 # Land use
 land_use = to_snakecase(pd.read_excel(f'{GCS_FILE_PATH}Metrics_Scoring_All_Projects.xlsx', sheet_name="Land Use"))
+land_use.columns = land_use.columns.str.replace('?', '')
 land_use.to_parquet(f'{GCS_FILE_PATH}data_entry_raw_land_use.parquet')
 
+# VMT
+vmt = to_snakecase(pd.read_excel(f'{GCS_FILE_PATH}Metrics_Scoring_All_Projects.xlsx', sheet_name="VMT"))
+vmt.columns = vmt.columns.str.replace('?', '')
+vmt['estimated_change_in_vmt__total_for_project_'] = vmt['estimated_change_in_vmt__total_for_project_'].str.replace('\n', '')
+vmt.to_parquet(f'{GCS_FILE_PATH}data_entry_raw_vmt.parquet')
+