Skip to content

Commit

Permalink
Merge pull request #842 from cal-itp/csis-land-use
Browse files Browse the repository at this point in the history
CSIS Metric Testing: Land Use + Natural Resources
  • Loading branch information
KatrinaMKaiser authored Aug 30, 2023
2 parents 743f6e8 + 55afb81 commit 4ad5937
Show file tree
Hide file tree
Showing 4 changed files with 4,316 additions and 194 deletions.
3,706 changes: 3,706 additions & 0 deletions project_prioritization/metrics/metrics_testing_land_use.ipynb

Large diffs are not rendered by default.

635 changes: 444 additions & 191 deletions project_prioritization/metrics/metrics_testing_safety.ipynb

Large diffs are not rendered by default.

162 changes: 159 additions & 3 deletions project_prioritization/metrics/read_data_entry.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,27 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "34104da9-6af5-42e4-981c-c74ccf987005",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.9/site-packages/geopandas/_compat.py:124: UserWarning: The Shapely GEOS version (3.11.1-CAPI-1.17.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.1-CAPI-1.16.0). Conversions between both will be slow.\n",
" warnings.warn(\n",
"/tmp/ipykernel_311/1872485643.py:4: DeprecationWarning: Shapely 2.0 is installed, but because PyGEOS is also installed, GeoPandas still uses PyGEOS by default. However, starting with version 0.14, the default will switch to Shapely. To force to use Shapely 2.0 now, you can either uninstall PyGEOS or set the environment variable USE_PYGEOS=0. You can do this before starting the Python process, or in your code before importing geopandas:\n",
"\n",
"import os\n",
"os.environ['USE_PYGEOS'] = '0'\n",
"import geopandas\n",
"\n",
"In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).\n",
" import geopandas as gpd\n"
]
}
],
"source": [
"import os\n",
"os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(1_000_000_000_000) ## 1TB?\n",
Expand Down Expand Up @@ -162,10 +179,149 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"id": "598992c9-c36a-4702-802d-aa75366c7410",
"metadata": {},
"outputs": [],
"source": [
"# VMT\n",
"vmt = to_snakecase(pd.read_excel(f'{GCS_FILE_PATH}Metrics_Scoring_All_Projects.xlsx', sheet_name=\"VMT\"))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "8f9e5dfa-d007-478d-99e0-5f3bdcf2ee6a",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_311/65901047.py:1: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
" vmt.columns = vmt.columns.str.replace('?', '')\n"
]
}
],
"source": [
"vmt.columns = vmt.columns.str.replace('?', '')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "e51f873a-f82e-4df1-b87f-bb2b59fc6d5e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 80 entries, 0 to 79\n",
"Data columns (total 17 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 submission_log_number 80 non-null int64 \n",
" 1 program 80 non-null object \n",
" 2 project_name 80 non-null object \n",
" 3 data_enterer_name 54 non-null object \n",
" 4 done_y_n 50 non-null object \n",
" 5 notes 53 non-null object \n",
" 6 estimated_change_in_vmt__total_for_project_ 39 non-null object \n",
" 7 is_specific_ 52 non-null object \n",
" 8 project_contains_new_lane_miles_ 50 non-null object \n",
" 9 total_new_lane_miles 45 non-null object \n",
" 10 project_contains_new_interchange_ 51 non-null object \n",
" 11 project_contains_new_transit_riders_ 52 non-null object \n",
" 12 project_contains_active_transportation_element_ 52 non-null object \n",
" 13 score 53 non-null float64\n",
" 14 additional_notes 18 non-null object \n",
" 15 hunter_esimtate_comments 6 non-null object \n",
" 16 henry_estimate_comments 13 non-null object \n",
"dtypes: float64(1), int64(1), object(15)\n",
"memory usage: 10.8+ KB\n"
]
}
],
"source": [
"vmt.info()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "a8f83277-c61e-4d0b-9b59-37b6ba747012",
"metadata": {},
"outputs": [],
"source": [
"vmt['estimated_change_in_vmt__total_for_project_'] = vmt['estimated_change_in_vmt__total_for_project_'].str.replace('\\n', '')"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "6635b7d9-c04a-4d57-9aa8-be8fb1ef05b1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 14\n",
"N 14\n",
"? 3\n",
"3.5 1\n",
"7.8 1\n",
"16 1\n",
"17.2 1\n",
"10.4 1\n",
"7.5 1\n",
"32 1\n",
"30.4 1\n",
"4.2 1\n",
"1.3 1\n",
"22.2 1\n",
"14 1\n",
"6.7 1\n",
"4.4 1\n",
"Name: total_new_lane_miles, dtype: int64"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vmt.total_new_lane_miles.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "d4eaa6d4-1895-40ff-99e1-8e5d4d4ceaf9",
"metadata": {},
"outputs": [],
"source": [
"vmt = vmt.astype({'project_contains_new_lane_miles_':'str', 'total_new_lane_miles':'str'})"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "6dffb368-6ac8-4d16-8355-221aea3b0381",
"metadata": {},
"outputs": [],
"source": [
"vmt.to_parquet(f'{GCS_FILE_PATH}data_entry_raw_vmt.parquet')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "53334538-9502-4ed3-92f3-0ad5b00ac993",
"metadata": {},
"outputs": [],
"source": []
}
],
Expand Down
7 changes: 7 additions & 0 deletions project_prioritization/metrics/read_data_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,12 @@

# Land use
land_use = to_snakecase(pd.read_excel(f'{GCS_FILE_PATH}Metrics_Scoring_All_Projects.xlsx', sheet_name="Land Use"))
land_use.columns = land_use.columns.str.replace('?', '')
land_use.to_parquet(f'{GCS_FILE_PATH}data_entry_raw_land_use.parquet')

# VMT
vmt = to_snakecase(pd.read_excel(f'{GCS_FILE_PATH}Metrics_Scoring_All_Projects.xlsx', sheet_name="VMT"))
vmt.columns = vmt.columns.str.replace('?', '')
vmt['estimated_change_in_vmt__total_for_project_'] = vmt['estimated_change_in_vmt__total_for_project_'].str.replace('\n', '')
vmt.to_parquet(f'{GCS_FILE_PATH}data_entry_raw_vmt.parquet')

0 comments on commit 4ad5937

Please sign in to comment.