From fc86cb8a7cddfaa6628a2e738307c52475aaaa49 Mon Sep 17 00:00:00 2001
From: DavidOry <david.ory@gmail.com>
Date: Fri, 8 Mar 2024 13:51:57 -0500
Subject: [PATCH] Add demand trimming notebook

---
 ...rved-demand-for-congested-assignment.ipynb | 410 ++++++++++++++++++
 1 file changed, 410 insertions(+)
 create mode 100644 notebooks/trim-observed-demand-for-congested-assignment.ipynb

diff --git a/notebooks/trim-observed-demand-for-congested-assignment.ipynb b/notebooks/trim-observed-demand-for-congested-assignment.ipynb
new file mode 100644
index 00000000..291294be
--- /dev/null
+++ b/notebooks/trim-observed-demand-for-congested-assignment.ipynb
@@ -0,0 +1,410 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Trim Observed Demand for Congested Assignment\n",
+    "Emme's congested assignment procedures fail if you assign demand for which there is no path. Because the observed demand is based on survey data, it is likely a small number of trips observed in the survey data can not be completed in the model network. This notebook uses skims from an extended assignment (not congested, which will not fail if there is no path for the demand) to trim the observed demand so that it can be assigned in the congested assignment without error."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import openmatrix as omx\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Remote I/O"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "observed_demand_filename = r\"/Users/wsp/Downloads/observed-demand-year-2015-am-emme-taz-by-path.csv\"\n",
+    "skim_dir = \"../examples/temp_acceptance/skims\"\n",
+    "output_demand_filename = \"../examples/temp_on_board_assign/observed-demand-year-2015-am-emme-taz-by-path-trimmed.csv\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Methods"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def make_dataframe_from_omx(input_mtx: omx, core_name: str):\n",
+    "\n",
+    "    a = np.array(input_mtx)\n",
+    "\n",
+    "    df = pd.DataFrame(a)\n",
+    "    df = (\n",
+    "        df.unstack()\n",
+    "        .reset_index()\n",
+    "        .rename(\n",
+    "            columns={\"level_0\": \"origin\", \"level_1\": \"destination\", 0: core_name}\n",
+    "        )\n",
+    "    )\n",
+    "    df[\"origin\"] = df[\"origin\"] + 1\n",
+    "    df[\"destination\"] = df[\"destination\"] + 1\n",
+    "\n",
+    "    return df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def fetch_skim_values(path: str, time_period: str):\n",
+    "    filename = os.path.join(skim_dir, \"trnskm{}_{}.omx\".format(time_period.upper(), path))\n",
+    "    omx_handle = omx.open_file(filename)\n",
+    "\n",
+    "    matrix_list = [\"IVT\", \"WAUX\", \"BOARDS\", \"WACC\", \"WEGR\"]\n",
+    " \n",
+    "    first_matrix = True\n",
+    "    for matrix in matrix_list:\n",
+    "        matrix_name = time_period + \"_\" + path + \"_\" + matrix\n",
+    "        df = make_dataframe_from_omx(omx_handle[matrix_name], matrix_name)\n",
+    "        df.columns = [\"origin\", \"destination\", matrix]\n",
+    "        if first_matrix:\n",
+    "            running_df = df\n",
+    "            first_matrix = False\n",
+    "        else:\n",
+    "            running_df = running_df.merge(df, on=[\"origin\", \"destination\"])\n",
+    "\n",
+    "    omx_handle.close()\n",
+    "    running_df[\"path\"] = path.lower()\n",
+    "    running_df[\"time_period\"] = time_period.lower()\n",
+    "\n",
+    "    return running_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def identify_present_paths(input_df: pd.DataFrame):\n",
+    "    df = input_df.copy()\n",
+    "    df[\"walk_time\"] = df[\"WAUX\"] + df[\"WACC\"] + df[\"WEGR\"] \n",
+    "    df[\"path_found\"] = (df[\"IVT\"] > 0.01) | (df[\"walk_time\"] > 0.01)\n",
+    "    \n",
+    "    return df[[\"origin\", \"destination\", \"path\", \"time_period\", \"path_found\"]].copy()\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def flag_path_in_observed(input_observed_df: pd.DataFrame, input_path: str, input_time_period: str):\n",
+    "    o_df = input_observed_df.copy()\n",
+    "\n",
+    "    df = fetch_skim_values(input_path, input_time_period)\n",
+    "    a_df = identify_present_paths(df)\n",
+    "\n",
+    "    r_df = pd.merge(o_df, a_df, left_on = [\"model_time\", \"path_type\", \"orig_emme_taz\", \"dest_emme_taz\"], right_on=[\"time_period\", \"path\", \"origin\", \"destination\"], how=\"left\").reset_index(drop=True)\n",
+    "    r_df = r_df.drop(columns=[\"origin\", \"destination\", \"path\", \"time_period\"])\n",
+    "    r_df[\"path_found\"] = r_df[\"path_found\"].fillna(False)\n",
+    "    r_df = r_df.rename(columns={\"path_found\": \"path_found_{}\".format(input_path.lower())})\n",
+    "\n",
+    "    return r_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Data Reads"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "observed_df = pd.read_csv(observed_demand_filename) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "path_type\n",
+       "wlk_trn_wlk    21751\n",
+       "pnr_trn_wlk     5981\n",
+       "knr_trn_wlk     4515\n",
+       "wlk_trn_knr      932\n",
+       "wlk_trn_pnr      149\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "observed_df.value_counts(\"path_type\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>model_time</th>\n",
+       "      <th>path_type</th>\n",
+       "      <th>orig_emme_taz</th>\n",
+       "      <th>dest_emme_taz</th>\n",
+       "      <th>trips</th>\n",
+       "      <th>path_found_wlk_trn_wlk</th>\n",
+       "      <th>path_found_pnr_trn_wlk</th>\n",
+       "      <th>path_found_knr_trn_wlk</th>\n",
+       "      <th>path_found_wlk_trn_pnr</th>\n",
+       "      <th>path_found_wlk_trn_knr</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>am</td>\n",
+       "      <td>knr_trn_wlk</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2953</td>\n",
+       "      <td>12.064273</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>am</td>\n",
+       "      <td>knr_trn_wlk</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1055</td>\n",
+       "      <td>5.721250</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>am</td>\n",
+       "      <td>knr_trn_wlk</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2592</td>\n",
+       "      <td>1.040014</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>am</td>\n",
+       "      <td>knr_trn_wlk</td>\n",
+       "      <td>5</td>\n",
+       "      <td>2178</td>\n",
+       "      <td>4.855805</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>am</td>\n",
+       "      <td>knr_trn_wlk</td>\n",
+       "      <td>7</td>\n",
+       "      <td>398</td>\n",
+       "      <td>20.209921</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  model_time    path_type  orig_emme_taz  dest_emme_taz      trips  \\\n",
+       "0         am  knr_trn_wlk              1           2953  12.064273   \n",
+       "1         am  knr_trn_wlk              2           1055   5.721250   \n",
+       "2         am  knr_trn_wlk              3           2592   1.040014   \n",
+       "3         am  knr_trn_wlk              5           2178   4.855805   \n",
+       "4         am  knr_trn_wlk              7            398  20.209921   \n",
+       "\n",
+       "   path_found_wlk_trn_wlk  path_found_pnr_trn_wlk  path_found_knr_trn_wlk  \\\n",
+       "0                   False                   False                    True   \n",
+       "1                   False                   False                    True   \n",
+       "2                   False                   False                    True   \n",
+       "3                   False                   False                    True   \n",
+       "4                   False                   False                    True   \n",
+       "\n",
+       "   path_found_wlk_trn_pnr  path_found_wlk_trn_knr  \n",
+       "0                   False                   False  \n",
+       "1                   False                   False  \n",
+       "2                   False                   False  \n",
+       "3                   False                   False  \n",
+       "4                   False                   False  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "running_observed_df = flag_path_in_observed(observed_df, \"WLK_TRN_WLK\", \"AM\")\n",
+    "running_observed_df = flag_path_in_observed(running_observed_df, \"PNR_TRN_WLK\", \"AM\")\n",
+    "running_observed_df = flag_path_in_observed(running_observed_df, \"KNR_TRN_WLK\", \"AM\")\n",
+    "running_observed_df = flag_path_in_observed(running_observed_df, \"WLK_TRN_PNR\", \"AM\")\n",
+    "running_observed_df = flag_path_in_observed(running_observed_df, \"WLK_TRN_KNR\", \"AM\")\n",
+    "running_observed_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "path_present\n",
+       "True     32006\n",
+       "False     1322\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "output_df = running_observed_df.copy() \n",
+    "output_df[\"path_present\"] = output_df[\"path_found_wlk_trn_wlk\"] | output_df[\"path_found_pnr_trn_wlk\"] | output_df[\"path_found_knr_trn_wlk\"] | output_df[\"path_found_wlk_trn_pnr\"] | output_df[\"path_found_wlk_trn_knr\"]\n",
+    "output_df.value_counts(\"path_present\")  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output_df = output_df[output_df[\"path_present\"] == True].copy().reset_index(drop=True)\n",
+    "output_df = output_df[[\"model_time\", \"path_type\", \"orig_emme_taz\", \"dest_emme_taz\", \"trips\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9690178614896935"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sum(output_df[\"trips\"])/sum(observed_df[\"trips\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output_df.to_csv(output_demand_filename, index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "tm2py",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}