diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index cf3a889..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.gitignore b/.gitignore index 6a0eab5..dfc757b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ + +# MacOS file system +*.DS_Store + # Custom excludes viz_scripts/conf plots diff --git a/viz_scripts/.DS_Store b/viz_scripts/.DS_Store index 147fc0a..3784b98 100644 Binary files a/viz_scripts/.DS_Store and b/viz_scripts/.DS_Store differ diff --git a/viz_scripts/auxiliary_files/cost.csv b/viz_scripts/auxiliary_files/cost.csv new file mode 100644 index 0000000..d451f80 --- /dev/null +++ b/viz_scripts/auxiliary_files/cost.csv @@ -0,0 +1,15 @@ +mode,C($/PMT),($)/trip +"Car, drove alone",0.55,0 +"Car, with others",0.275,0 +Taxi/Uber/Lyft,2.5,0 +Bus,0.855,0 +Free Shuttle,0,0 +Train,0.855,0 +Scooter share,0.15,1 +Pilot ebike,0,0 +Bikeshare,0.09,0 +Walk,0,0 +Skate board,0,0 +Regular Bike,0,0 +Not a Trip,0,0 +No Travel,0,0 \ No newline at end of file diff --git a/viz_scripts/cost_and_time_impact_estimates.ipynb b/viz_scripts/cost_and_time_impact_estimates.ipynb new file mode 100644 index 0000000..1bb6e7b --- /dev/null +++ b/viz_scripts/cost_and_time_impact_estimates.ipynb @@ -0,0 +1,572 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2518a96d", + "metadata": {}, + "source": [ + "# Based on Issue #31: Add cost and time estimates as well\n", + "We want to add simple cost and time estimates to assess the broader impacts of programs.\n", + "We will do so by creating simple distance-based maps for each metric - e.g.\n", + "\n", + "cost_per_mile = { \"drove_alone\": ..., \"shared_ride\": ...., \"pilot_ebike\": ....\n", + "}\n", + "\n", + "We can then compute the overall impact of the metric by pseudo code similar to:\n", + "\n", + "for trip in trips:\n", + " cost_impact_trip = (cost_per_mile[“ebike”] – cost_per_mile[trip.replaced_mode]) * trip.length_in_miles\n", + " cost_impact_trips.append(cost_impact_trip)\n", + "\n", + "cost_impact_overall = sum(cost_impact_trips)\n", + "\n", + "Of course, we could also use pandas if that works better - e.g. something like:\n", + "\n", + "cost_impact_trips = trips.apply(lambda trip_row: (cost_per_mile[\"ebike\"] - cost_per_mile[trip_row.replaced_mode]) * trip_row.length__in_miles\n", + "cost_impact_overall = cost_impact_trips.sum()\n" + ] + }, + { + "cell_type": "markdown", + "id": "0a308acb", + "metadata": {}, + "source": [ + "Shankari K. suggested following the process outlined in the energy_calculations notebook reproduced below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9624fe3", + "metadata": {}, + "outputs": [], + "source": [ + "# user defined modules\n", + "import scaffolding\n", + "from plots import *\n", + "\n", + "# external packages\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "from collections import defaultdict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29424542", + "metadata": {}, + "outputs": [], + "source": [ + "# global configurations\n", + "sns.set_style('whitegrid')\n", + "sns.set()\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e55a1a1f", + "metadata": {}, + "outputs": [], + "source": [ + "# external variables (run mapping_dictionaries notebook before running this cell)\n", + "%store -r df_EI \n", + "%store -r df_CT\n", + "\n", + "%store -r dic_re\n", + "%store -r dic_pur\n", + "%store -r dic_fuel\n", + "\n", + "# convert a dictionary to a defaultdict\n", + "dic_pur = defaultdict(lambda: 'Other',dic_pur)\n", + "dic_re = defaultdict(lambda: 'Other',dic_re)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef7dd45c", + "metadata": {}, + "outputs": [], + "source": [ + "# Scaffolding Inputs (None -> get all data)\n", + "year = None\n", + "month = None\n", + "program = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9beff67f", + "metadata": {}, + "outputs": [], + "source": [ + "# Define time series for year and month\n", + "tq = scaffolding.get_time_query(year, month)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a7cdcde", + "metadata": {}, + "outputs": [], + "source": [ + "# Acquire rows from database corresponding to time series and program label\n", + "participant_ct_df = scaffolding.load_all_participant_trips(program, tq)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c50baf4a", + "metadata": {}, + "outputs": [], + "source": [ + "# Remove any labelled trips from the dataframe\n", + "labeled_ct = scaffolding.filter_labeled_trips(participant_ct_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2dd6e2a", + "metadata": {}, + "outputs": [], + "source": [ + "# Just expand the user_input feature to multiple features for each entry\n", + "expanded_ct = scaffolding.expand_userinputs(labeled_ct)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f948dc57", + "metadata": {}, + "outputs": [], + "source": [ + "# Removes some rows that don't show a change from another mode to pilot e-bike + name same_mode as confirmed_mode\n", + "expanded_ct = scaffolding.data_quality_check(expanded_ct)\n", + "expanded_ct.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9495e947", + "metadata": {}, + "outputs": [], + "source": [ + "## Mapping new labels with dictionaries\n", + "expanded_ct['Trip_purpose']= expanded_ct['purpose_confirm'].map(dic_pur)\n", + "expanded_ct['Mode_confirm']= expanded_ct['mode_confirm'].map(dic_re)\n", + "expanded_ct['Replaced_mode']= expanded_ct['replaced_mode'].map(dic_re)\n", + "\n", + "#Mapping fuel\n", + "expanded_ct['Mode_confirm_fuel']= expanded_ct['Mode_confirm'].map(dic_fuel)\n", + "expanded_ct['Replaced_mode_fuel']= expanded_ct['Replaced_mode'].map(dic_fuel)\n", + "expanded_ct.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e27d0e9", + "metadata": {}, + "outputs": [], + "source": [ + "# Just a meters to miles conversion at this point\n", + "scaffolding.unit_conversions(expanded_ct)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5330285", + "metadata": {}, + "outputs": [], + "source": [ + "file_suffix = scaffolding.get_file_suffix(year, month, program)\n", + "quality_text = scaffolding.get_quality_text(participant_ct_df, expanded_ct)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bec31447", + "metadata": {}, + "outputs": [], + "source": [ + "expanded_ct['duration_h']" + ] + }, + { + "cell_type": "markdown", + "id": "e0420cf9", + "metadata": {}, + "source": [ + "### Analysis of Cost Impact" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9448046e", + "metadata": {}, + "outputs": [], + "source": [ + "expanded_ct = scaffolding.cost(expanded_ct, df_CT,'Replaced_mode', 'Mode_confirm')\n", + "expanded_ct = scaffolding.cost_impact(expanded_ct, 'distance_miles','Replaced_mode', 'Mode_confirm')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41460ce2", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "expanded_ct.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69f6ae3d", + "metadata": {}, + "outputs": [], + "source": [ + "data=expanded_ct.loc[(expanded_ct['distance_miles'] <= 40)].sort_values(by=['Cost_Impact($)'], ascending=False) \n", + "x='Cost_Impact($)'\n", + "y='distance_miles'\n", + "legend ='Mode_confirm'\n", + "plot_title=\"Sketch of Cost_Impact($) by Travel Mode Selected\\n%s\" % quality_text\n", + "file_name ='sketch_distance_cost_impact%s.png' % file_suffix\n", + "distancevsenergy(data,x,y,legend,plot_title,file_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84f69f84", + "metadata": {}, + "outputs": [], + "source": [ + "#eirp : energy impact replaced_mode\n", + "eirc=expanded_ct.groupby('Replaced_mode').agg({'Cost_Impact($)': ['sum', 'mean']},)\n", + "eirc.columns = ['Sketch of Total Cost_Impact($)', 'Sketch of Average Cost_Impact($)']\n", + "eirc = eirc.reset_index()\n", + "eirc = eirc.sort_values(by=['Sketch of Total Cost_Impact($)'], ascending=False)\n", + "eirc['boolean'] = eirc['Sketch of Total Cost_Impact($)'] > 0\n", + "\n", + "#eimc : energy impact mode_confirm\n", + "eimc=expanded_ct.groupby('Mode_confirm').agg({'Cost_Impact($)': ['sum', 'mean']},)\n", + "eimc.columns = ['Sketch of Total Cost_Impact($)', 'Sketch of Average Cost_Impact($)']\n", + "eimc = eimc.reset_index()\n", + "eimc = eimc.sort_values(by=['Sketch of Total Cost_Impact($)'], ascending=False)\n", + "\n", + "\n", + "subset1 = eirc [['Replaced_mode', 'Sketch of Total Cost_Impact($)']].copy()\n", + "subset1.rename(columns = {'Replaced_mode':'Transport Mode','Sketch of Total Cost_Impact($)':'Replaced_Mode' }, inplace=True)\n", + "\n", + "subset2 = eimc [['Mode_confirm', 'Sketch of Total Cost_Impact($)']].copy()\n", + "subset2.rename(columns = {'Mode_confirm':'Transport Mode','Sketch of Total Cost_Impact($)':'Mode_Confirm' }, inplace=True)\n", + "\n", + "df_plot = pd.merge(subset1, subset2, on=\"Transport Mode\")\n", + "df = pd.melt(df_plot , id_vars=['Transport Mode'], value_vars=['Replaced_Mode','Mode_Confirm'], var_name='selection')\n", + "df.rename(columns = {'value':'Cost Impact ($)'}, inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e5aba5a", + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df78180e", + "metadata": {}, + "outputs": [], + "source": [ + "df= df.sort_values(by=['Cost Impact ($)'], ascending=False)\n", + "x= 'Cost Impact ($)'\n", + "y= 'Transport Mode'\n", + "color = 'selection'\n", + "plot_title=\"Sketch of Cost Impact ($) by Transport Mode\\n%s\" % quality_text\n", + "file_name ='sketch_all_cost_impact%s.png' % file_suffix\n", + "overeall_energy_impact(x,y,color,df,plot_title,file_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5da0d49b", + "metadata": {}, + "outputs": [], + "source": [ + "net_cost_saved = round(sum(eirc['Sketch of Total Cost_Impact($)']), 2)\n", + "\n", + "x = eirc['Sketch of Total Cost_Impact($)']\n", + "y = eirc['Replaced_mode']\n", + "color =eirc['boolean']\n", + "\n", + "plot_title=\"Sketch of Cost Impact for all confirmed trips \\n Contribution by mode towards a total of %s ($) \\n%s\" % (net_cost_saved, quality_text)\n", + "file_name ='sketch_all_mode_cost_impact%s.png' % file_suffix\n", + "energy_impact(x,y,color,plot_title,file_name,'Cost_Impact($)')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bcff73a3", + "metadata": {}, + "outputs": [], + "source": [ + "data_eb = expanded_ct.query(\"Mode_confirm == 'Pilot ebike'\")\n", + "# ebei : ebike energy impact\n", + "ebei=data_eb.groupby('Replaced_mode').agg({'Cost_Impact($)': ['sum', 'mean']},)\n", + "ebei.columns = ['Sketch of Total Cost_Impact($)', 'Sketch of Average Cost_Impact($)']\n", + "ebei= ebei.reset_index()\n", + "ebei = ebei.sort_values(by=['Sketch of Total Cost_Impact($)'], ascending=False)\n", + "ebei['boolean'] = ebei['Sketch of Total Cost_Impact($)'] > 0\n", + "net_energy_saved = round(sum(ebei['Sketch of Total Cost_Impact($)']), 2)\n", + "\n", + "x = ebei['Sketch of Total Cost_Impact($)']\n", + "y = ebei['Replaced_mode']\n", + "color =ebei['boolean']\n", + "\n", + "plot_title=\"Sketch of Cost Impact of E-Bike trips\\n Contribution by replaced mode towards a total of %s ($)\\n %s\" % (net_energy_saved, quality_text)\n", + "file_name ='sketch_cost_impact_ebike%s.png' % file_suffix\n", + "energy_impact(x,y,color,plot_title,file_name,'Cost_Impact($)')" + ] + }, + { + "cell_type": "markdown", + "id": "e12ee241", + "metadata": {}, + "source": [ + "### Analysis of Time Impact" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3065d606", + "metadata": {}, + "outputs": [], + "source": [ + "trash, dura = scaffolding.calc_avg_dura(expanded_ct, 'distance_miles', 'duration_h', 'Mode_confirm')\n", + "df_T = pd.DataFrame(dura)\n", + "df_T.reset_index(inplace=True)\n", + "df_T.rename(columns={'Mode_confirm':'mode','D(time/PMT)':'D(hours/PMT)'}, inplace=True)\n", + "df_T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10cad517", + "metadata": {}, + "outputs": [], + "source": [ + "expanded_ct = scaffolding.time(expanded_ct, df_T,'Replaced_mode', 'Mode_confirm')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62a3a89f", + "metadata": {}, + "outputs": [], + "source": [ + "expanded_ct.rename(columns={'dura__trip_Mode_confirm':'dura__trip_mode', 'dura__trip_Replaced_mode':'dura__trip_repm'}, inplace=True)\n", + "expanded_ct.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3c77e24", + "metadata": {}, + "outputs": [], + "source": [ + "expanded_ct = scaffolding.time_impact(expanded_ct, 'distance_miles','Replaced_mode', 'Mode_confirm')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74e5546d", + "metadata": {}, + "outputs": [], + "source": [ + "data=expanded_ct.loc[(expanded_ct['distance_miles'] <= 40)].sort_values(by=['Time_Impact(hours)'], ascending=False) \n", + "x='Time_Impact(hours)'\n", + "y='distance_miles'\n", + "legend ='Mode_confirm'\n", + "plot_title=\"Sketch of Time_Impact(hours) by Travel Mode Selected\\n%s\" % quality_text\n", + "file_name ='sketch_distance_time_impact%s.png' % file_suffix\n", + "distancevsenergy(data,x,y,legend,plot_title,file_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3125b5d9", + "metadata": {}, + "outputs": [], + "source": [ + "#eirp : energy impact replaced_mode\n", + "eirc=expanded_ct.groupby('Replaced_mode').agg({'Time_Impact(hours)': ['sum', 'mean']},)\n", + "eirc.columns = ['Sketch of Total Time_Impact(hours)', 'Sketch of Average Time_Impact(hours)']\n", + "eirc = eirc.reset_index()\n", + "eirc = eirc.sort_values(by=['Sketch of Total Time_Impact(hours)'], ascending=False)\n", + "eirc['boolean'] = eirc['Sketch of Total Time_Impact(hours)'] > 0\n", + "\n", + "#eimc : energy impact mode_confirm\n", + "eimc=expanded_ct.groupby('Mode_confirm').agg({'Time_Impact(hours)': ['sum', 'mean']},)\n", + "eimc.columns = ['Sketch of Total Time_Impact(hours)', 'Sketch of Average Time_Impact(hours)']\n", + "eimc = eimc.reset_index()\n", + "eimc = eimc.sort_values(by=['Sketch of Total Time_Impact(hours)'], ascending=False)\n", + "\n", + "\n", + "subset1 = eirc [['Replaced_mode', 'Sketch of Total Time_Impact(hours)']].copy()\n", + "subset1.rename(columns = {'Replaced_mode':'Transport Mode','Sketch of Total Time_Impact(hours)':'Replaced_Mode' }, inplace=True)\n", + "\n", + "subset2 = eimc [['Mode_confirm', 'Sketch of Total Time_Impact(hours)']].copy()\n", + "subset2.rename(columns = {'Mode_confirm':'Transport Mode','Sketch of Total Time_Impact(hours)':'Mode_Confirm' }, inplace=True)\n", + "\n", + "df_plot = pd.merge(subset1, subset2, on=\"Transport Mode\")\n", + "df = pd.melt(df_plot , id_vars=['Transport Mode'], value_vars=['Replaced_Mode','Mode_Confirm'], var_name='selection')\n", + "df.rename(columns = {'value':'Time_Impact(hours)'}, inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2769f502", + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db91fd02", + "metadata": {}, + "outputs": [], + "source": [ + "df= df.sort_values(by=['Time_Impact(hours)'], ascending=False)\n", + "x= 'Time_Impact(hours)'\n", + "y= 'Transport Mode'\n", + "color = 'selection'\n", + "plot_title=\"Sketch of Time_Impact(hours) by Transport Mode\\n%s\" % quality_text\n", + "file_name ='sketch_all_time_impact%s.png' % file_suffix\n", + "overeall_energy_impact(x,y,color,df,plot_title,file_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54e1b96c", + "metadata": {}, + "outputs": [], + "source": [ + "net_cost_saved = round(sum(eirc['Sketch of Total Time_Impact(hours)']), 2)\n", + "\n", + "x = eirc['Sketch of Total Time_Impact(hours)']\n", + "y = eirc['Replaced_mode']\n", + "color =eirc['boolean']\n", + "\n", + "plot_title=\"Sketch of Time Impact for all confirmed trips \\n Contribution by mode towards a total of %s (hours) \\n%s\" % (net_cost_saved, quality_text)\n", + "file_name ='sketch_all_mode_time_impact%s.png' % file_suffix\n", + "energy_impact(x,y,color,plot_title,file_name,'Time_Impact(hours)')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a900d383", + "metadata": {}, + "outputs": [], + "source": [ + "data_eb = expanded_ct.query(\"Mode_confirm == 'Pilot ebike'\")\n", + "# ebei : ebike energy impact\n", + "ebei=data_eb.groupby('Replaced_mode').agg({'Time_Impact(hours)': ['sum', 'mean']},)\n", + "ebei.columns = ['Sketch of Total Time_Impact(hours)', 'Sketch of Average Time_Impact(hours)']\n", + "ebei= ebei.reset_index()\n", + "ebei = ebei.sort_values(by=['Sketch of Total Time_Impact(hours)'], ascending=False)\n", + "ebei['boolean'] = ebei['Sketch of Total Time_Impact(hours)'] > 0\n", + "net_energy_saved = round(sum(ebei['Sketch of Total Time_Impact(hours)']), 2)\n", + "\n", + "x = ebei['Sketch of Total Time_Impact(hours)']\n", + "y = ebei['Replaced_mode']\n", + "color =ebei['boolean']\n", + "\n", + "plot_title=\"Sketch of Time Impact of E-Bike trips\\n Contribution by replaced mode towards a total of %s (hours)\\n %s\" % (net_energy_saved, quality_text)\n", + "file_name ='sketch_time_impact_ebike%s.png' % file_suffix\n", + "energy_impact(x,y,color,plot_title,file_name,'Time_Impact(hours)')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f72cf4eb", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf000764", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d71c5bd", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/viz_scripts/energy_calculations.ipynb b/viz_scripts/energy_calculations.ipynb index 22868ca..ab7d112 100644 --- a/viz_scripts/energy_calculations.ipynb +++ b/viz_scripts/energy_calculations.ipynb @@ -434,6 +434,14 @@ "file_name ='sketch_CO2impact_ebike%s.png' % file_suffix\n", "CO2_impact(x,y,color,plot_title,file_name)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbbaed62", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/viz_scripts/mapping_dictionaries.ipynb b/viz_scripts/mapping_dictionaries.ipynb index e7c4264..694c5f9 100644 --- a/viz_scripts/mapping_dictionaries.ipynb +++ b/viz_scripts/mapping_dictionaries.ipynb @@ -20,6 +20,7 @@ "df_pur= pd.read_csv(r'auxiliary_files/purpose_labels.csv')\n", "df_re = pd.read_csv(r'auxiliary_files/mode_labels.csv')\n", "df_EI = pd.read_csv(r'auxiliary_files/energy_intensity.csv')\n", + "df_CT = pd.read_csv(r'auxiliary_files/cost.csv')\n", "\n", "#dictionaries:\n", "dic_pur = dict(zip(df_pur['purpose_confirm'],df_pur['bin_purpose'])) # bin purpose\n", @@ -35,10 +36,19 @@ "outputs": [], "source": [ "%store df_EI \n", + "%store df_CT\n", "%store dic_re \n", "%store dic_pur \n", "%store dic_fuel " ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a25d7e2", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/viz_scripts/plots.py b/viz_scripts/plots.py index e452bda..0798e2c 100644 --- a/viz_scripts/plots.py +++ b/viz_scripts/plots.py @@ -181,16 +181,16 @@ def overeall_energy_impact(x,y,color,data,plot_title,file_name): -def energy_impact(x,y,color,plot_title,file_name): +def energy_impact(x,y,color,plot_title,file_name,xl='Energy_Impact(kWH)'): color = color.map({True: 'green', False: 'red'}) - objects = ('Energy Savings', 'Energy Loss') + objects = ('Savings', 'Loss') y_labels = y plt.figure(figsize=(15, 8)) width = 0.8 ax = x.plot(kind='barh',width=width, color=color) ax.set_title(plot_title, fontsize=18) - ax.set_xlabel('Energy_Impact(kWH)', fontsize=18) + ax.set_xlabel(xl, fontsize=18) ax.set_ylabel('Replaced Mode',fontsize=18) ax.set_yticklabels(y_labels) ax.xaxis.set_tick_params(labelsize=15) diff --git a/viz_scripts/run_unit_tests.py b/viz_scripts/run_unit_tests.py new file mode 100644 index 0000000..a8c8840 --- /dev/null +++ b/viz_scripts/run_unit_tests.py @@ -0,0 +1,256 @@ +""" +Author: Stanley Y +Purpose: + To test functions in scaffolding + +Credit to: +https://docs.python.org/3.10/library/unittest.html +""" + +import unittest +import pandas as pd +import numpy as np +import scaffolding + +class TestEnergyIntensity(unittest.TestCase): + """ + A unit test for energy_intensity function in + the scaffolding.py file + """ + + def setUp(self): + self.constants = pd.DataFrame({ + 'mode': ['car', 'bus', 'train'], + 'vals': [12,5,2], + 'test': [0,0,0], + 'energy_intensity_factor': [0, 1, 2], + 'CO2_factor': [1, 2, 3], + '(kWH)/trip': [0.5, 0.2, 0.3], + 'C($/PMT)': [1,2,3], + 'D(hours/PMT)': [3,2,1] + }) + + self.data = pd.DataFrame({ + 'mode': ['car', 'bus', 'train', 'car'], + 'repm': ['car', 'car', 'bus', 'train'], + 'vals': [1,2,3, 4], + 'test': [0.5,3,0,8] + }) + + + def test_process(self): + expect = [('car', 12), ('bus', 5), ('train', 2)] + zipped = zip(self.constants['mode'], self.constants['vals']) + listed = list(zipped) + self.assertEqual(expect, listed, + 'Zip malfunction') + + expect = { + 'car': 12, + 'bus': 5, + 'train': 2 + } + zipped = zip(self.constants['mode'], self.constants['vals']) + a_dict = dict(zipped) + self.assertEqual(expect, a_dict, + 'Dict malfunction') + + expect = pd.Series( + [12, 12, 5, 2] + ) + a_dict = dict(zip(self.constants['mode'], self.constants['vals'])) + output = self.data['repm'].map(a_dict) + self.assertTrue(expect.equals(output), + 'Map malfunction') + + + def test_function(self): + expect = pd.DataFrame({ + 'mode': ['car', 'bus', 'train', 'car'], + 'repm': ['car', 'car', 'bus', 'train'], + 'vals': [1,2,3, 4], + 'test': [0.5,3,0,8], + 'ei_mode': [0, 1, 2, 0], + 'ei_repm': [0, 0, 1, 2], + 'CO2_mode': [1, 2, 3, 1], + 'CO2_repm': [1, 1, 2, 3], + 'ei_trip_mode': [0.5, 0.2, 0.3, 0.5], + 'ei_trip_repm': [0.5, 0.5, 0.2, 0.3], + }) + output = scaffolding.energy_intensity(self.data, self.constants, '', 'repm', 'mode') + self.assertTrue(expect.equals(output), + f"energy_intensity failed:\n{output[['ei_mode','ei_repm','CO2_mode','CO2_repm','ei_trip_mode','ei_trip_repm']]}") + + expect = pd.DataFrame({ + 'mode': ['car', 'bus', 'train', 'car'], + 'repm': ['car', 'car', 'bus', 'train'], + 'vals': [1,2,3, 4], + 'test': [0.5,3.0,0.0,8.0], + 'cost__trip_mode': [1,2,3,1], + 'cost__trip_repm': [1,1,2,3], + }) + output = scaffolding.cost(self.data, self.constants, 'repm', 'mode') + self.assertTrue(expect.equals(output), + f"cost failed:\n{output}") + + expect = pd.DataFrame({ + 'mode': ['car', 'bus', 'train', 'car'], + 'repm': ['car', 'car', 'bus', 'train'], + 'vals': [1,2,3, 4], + 'test': [0.5,3,0,8], + 'dura__trip_mode': [3,2,1,3], + 'dura__trip_repm': [3,3,2,1], + }) + output = scaffolding.time(self.data, self.constants, 'repm', 'mode') + self.assertTrue(expect.equals(output), + f"time failed:\n{output}") + + +class TestEnergyImpact(unittest.TestCase): + """ + A unit test for energy_impact_kWH function in + the scaffolding.py file + """ + + def setUp(self): + self.conditions = np.array([ + [True, False, False], + [False,True,False], + [False,False,True] + ]) + self.values = np.array([ + [8, 0, 3], + [3,5,7], + [4,2,9] + ]) + self.data = pd.DataFrame({ + 'mode': ['car', 'bus', 'train', 'car'], + 'repm': ['car', 'car', 'bus', 'train'], + 'dist': [1.5,2.5,3.5,4.5], + 'ei_mode': [1,2,3,1], + 'ei_repm': [1,1,2,3], + 'ei_trip_mode': [7,8,9,7], + 'ei_trip_repm': [7,7,8,9], + 'Mode_confirm_fuel': ['gasoline','diesel','electric','gasoline'], + 'Replaced_mode_fuel': ['gasoline','gasoline','diesel','electric'] + }) + + + def test_process(self): + expect = np.array([8, 5, 9]) + output = np.select(self.conditions, self.values) + if(len(expect) != len(output)): + self.assertTrue(False, + f'Select Malfunction (out: {output})') + else: + for i in range(len(expect)): + self.assertEqual(expect[i], output[i], + f'Select Malfunction (out: {output})') + + def test_function(self): + expect = pd.DataFrame({ + 'mode': ['car', 'bus', 'train', 'car'], + 'repm': ['car', 'car', 'bus', 'train'], + 'dist': [1.5,2.5,3.5,4.5], + 'ei_mode': [1,2,3,1], + 'ei_repm': [1,1,2,3], + 'ei_trip_mode': [7,8,9,7], + 'ei_trip_repm': [7,7,8,9], + 'Mode_confirm_fuel': ['gasoline','diesel','electric','gasoline'], + 'Replaced_mode_fuel': ['gasoline','gasoline','diesel','electric'], + 'repm_EI(kWH)':[1.5*1*0.000293071, + 2.5*1*0.000293071, + 3.5*2*0.000293071, + 4.5*3+9], + 'mode_EI(kWH)':[1.5*1*0.000293071, + 2.5*2*0.000293071, + 3.5*3+9, + 4.5*1*0.000293071], + 'Energy_Impact(kWH)':[round(1.5*1*0.000293071-1.5*1*0.000293071,3), + round(2.5*1*0.000293071-2.5*2*0.000293071,3), + round(3.5*2*0.000293071-(3.5*3+9),3), + round(4.5*3+9-4.5*1*0.000293071,3)] + }) + output = scaffolding.energy_impact_kWH(self.data,'dist','repm', 'mode') + self.assertTrue(np.isclose(expect['Energy_Impact(kWH)'], + output['Energy_Impact(kWH)']).all(), + f'Error in function') + + +class TestCalcAvgDura(unittest.TestCase): + """ + A unit test for calc_avg_dura function in + the scaffolding.py file + """ + + def setUp(self): + self.data = pd.DataFrame({ + 'mode': ['car', 'bus', 'train', 'car'], + 'dist': [1,2,3,4], + 'time': [1,2,3,4] + }) + + + def test_process(self): + # Average speed of each trip + expect = pd.Series([1.0,1.0,1.0,1.0]) + speeds = self.data['dist'] / self.data['time'] + self.assertTrue(expect.equals(speeds), + f'Calc speed failed.\n{expect}\n{speeds}') + + # Aggregate by mode + self.data['sped'] = self.data['dist'] / self.data['time'] + expect = pd.Series({ + 'bus': 1.0, + 'car': 1.0, + 'train': 1.0 + }) + groupd = self.data.groupby('mode') + speedm = groupd['sped'].mean() + self.assertTrue(expect.equals(speedm), + f'Agg by mean failed.\n{expect}\n{speedm}') + + + speedm = groupd['sped'].median() + self.assertTrue(expect.equals(speedm), + f'Agg by median failed.\n{expect}\n{speedm}') + + + None + + + def test_function(self): + expect1 = pd.DataFrame({ + 'mode': ['car', 'bus', 'train', 'car'], + 'dist': [1,2,3,4], + 'time': [1,2,3,4], + 'D(time/PMT)': [1.0, 1.0, 1.0, 1.0] + }) + expect2 = pd.Series( + data = [1.0, 1.0, 1.0], + index = ['bus', 'car', 'train'], + name = 'D(time/PMT)', + dtype=np.float64 + ) + result1, result2 = scaffolding.calc_avg_dura(self.data,'dist','time','mode','average') + self.assertTrue(expect1.equals(result1), + f'calc_avg_dura with average failed.[1]\n{result1}') + self.assertTrue(expect2.equals(result2), + f'calc_avg_dura with average failed.[2]\n{expect2}\n{result2}') + + result1, result2 = scaffolding.calc_avg_dura(self.data,'dist','time','mode','median') + self.assertTrue(expect1.equals(result1), + f'calc_avg_dura with median failed.[1]') + self.assertTrue(expect2.equals(result2), + f'calc_avg_dura with median failed.[2]') + + expect2 = None + result1, result2 = scaffolding.calc_avg_dura(self.data,'dist','time','mode','break') + self.assertTrue(expect1.equals(result1), + f'calc_avg_dura with incorrect method failed.[1]') + self.assertEqual(expect2, result2, + f'calc_avg_dura with incorrect method failed.[2]') + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index c8858a6..2e8a267 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -105,46 +105,144 @@ def data_quality_check(expanded_ct): def unit_conversions(df): df['distance_miles']= df["distance"]*0.00062 #meters to miles + df['duration_h'] = df['duration'] / 60 / 60 #seconds to hours + + +def feat_eng(data, const, feats, prefs, mode='Mode_confirm', repm='Replaced_mode'): + """ + Calculate trip aggregate results from constants and append to data (Feature Engine) + + Parameters: + data - trip data from OpenPATH + const - Pandas DataFrame with constant values for each mode (requires 'mode' feature) + feats - python list of feature names in const DataFrame that are of interest + prefs - prefixes to append to current feature names for new feature names + mode - feature name in data of feature with confirmed mode + repm - feature name in data of feature with replaced mode + + Returns: + data with appended features for each trip for both mode and replaced mode + """ + + # Check that const has a mode feature + if('mode' not in const.columns): + print('Error: mode not in constants dataframe.') + return data + + # Check features list and prefix list same length + if(len(feats) != len(prefs)): + print("Error: prefix list and feature list not the same length.") + return data + + # Check all feature names in constants dataframe + for feat in feats: + if(feat not in const.columns): + print('Error: ' + feat + ' not in constants dataframe.') + return data + + # Use copies, don't change originals + data = data.copy() + const = const.copy() + + # Duplicate mode feature in constant dataframe + for m in [mode, repm]: + const[m] = const['mode'] + + # Feature engine! + for i in range(len(feats)): + for m in [mode, repm]: + dic = dict(zip(const[m],const[feats[i]])) + + # Create new feature in data + fn = prefs[i]+m + data[fn] = data[m].map(dic) + print('Created ' + fn + ' feature in data.') + + return data + + def energy_intensity(df,df1,distance,col1,col2): - """ Inputs: - df = dataframe with data - df = dataframe with energy factors + """Inputs: + df = dataframe with trip data from OpenPATH + df1 = dataframe with energy factors distance = distance in meters col1 = Replaced_mode col2= Mode_confirm + """ + return feat_eng( + df, + df1, + ['energy_intensity_factor', 'CO2_factor', '(kWH)/trip'], + ['ei_', 'CO2_', 'ei_trip_'], + col2, + col1 + ) + +def cost(data, cost, repm, mode): """ - df1 = df1.copy() - df1[col1] = df1['mode'] - dic_ei_factor = dict(zip(df1[col1],df1['energy_intensity_factor'])) - dic_CO2_factor = dict(zip(df1[col1],df1['CO2_factor'])) - dic_ei_trip = dict(zip(df1[col1],df1['(kWH)/trip'])) - - df['ei_'+col1] = df[col1].map(dic_ei_factor) - df['CO2_'+col1] = df[col1].map(dic_CO2_factor) - df['ei_trip_'+col1] = df[col1].map(dic_ei_trip) - - - df1[col2] = df1[col1] - dic_ei_factor = dict(zip(df1[col2],df1['energy_intensity_factor'])) - dic_ei_trip = dict(zip(df1[col2],df1['(kWH)/trip'])) - dic_CO2_factor = dict(zip(df1[col2],df1['CO2_factor'])) - df['ei_'+col2] = df[col2].map(dic_ei_factor) - df['CO2_'+col2] = df[col2].map(dic_CO2_factor) - df['ei_trip_'+col2] = df[col2].map(dic_ei_trip) - - return df + Calculates the cost of each trip by mode + + Parameters: + data - trip data from OpenPATH + cost - dataframe defining cost ($/PMT) for each mode + repm - feature name in data of feature with replaced mode + mode - feature name in data of feature with confirmed mode + + Returns: + data with appended cost feature for each trip in $$$ for both mode and replaced mode (float) + """ + return feat_eng( + data, + cost, + ['C($/PMT)'], + ['cost__trip_'], + mode, + repm + ) + +def time(data, dura, repm, mode): + """ + Calculates the time of each participant trip in OpenPATH + + Parameters: + data - participant trip data from OpenPATH + dura - dataframe defining duration ((1/speed)/PMT) for each mode + repm - feature name in data of feature with replaced mode + mode - feature name in data of feature with confirmed mode + + Returns: + data with appended cost feature for each trip in $$$ for both mode and replaced mode (float) + """ + return feat_eng( + data, + dura, + ['D(hours/PMT)'], + ['dura__trip_'], + mode, + repm + ) + + def energy_impact_kWH(df,distance,col1,col2): - """ Inputs: + """ + Purpose: + Calculates energy intensity for each mode + by fuel type, then calculates the diference + between the energy intensity of replaced and + confirmed modes. + + Inputs: df = dataframe with data distance = distance in miles col1 = Replaced_mode col2= Mode_confirm """ - + + conditions_col1 = [(df['Replaced_mode_fuel'] =='gasoline'), (df['Replaced_mode_fuel'] == 'diesel'), (df['Replaced_mode_fuel'] == 'electric')] @@ -207,3 +305,87 @@ def CO2_impact_lb(df,distance,col1,col2): df['CO2_Impact(lb)'] = round((df[col1+'_lb_CO2'] - df[col2+'_lb_CO2']),3) return df + + +def cost_impact(data, dist, repm, mode): + """ + Calculates the cost impact for participants in OpenPATH + + Parameters: + data - participant trip data from OpenPATH + dist - feature name in df of feature with distance in miles + repm - feature name in df of feature with replaced mode + mode - feature name in df of feature with confirmed mode + + Returns: + data with appended cost impact feature for each trip in $$$ (float) + """ + + data[mode+'_cost'] = data[dist] * data['cost__trip_'+mode] + data[repm+'_cost'] = data[dist] * data['cost__trip_'+repm] + data['Cost_Impact($)'] = round((data[repm+'_cost'] - data[mode+'_cost']),2) + + return data + + +def time_impact(data, dist, repm, mode): + """ + Calculates the time impact of participant trips in OpenPATH + + Parameters: + data - participant trips OpenPATH data + dist - feature name in df of feature with distance in miles + repm - feature name in df of feature with replaced mode + mode - feature name in df of feature with confirmed mode + + Returns: + data with appended time impact feature for each trip in $$$ (float) + """ + + data[mode+'_dura'] = data[dist] * data['dura__trip_mode'] + data[repm+'_dura'] = data[dist] * data['dura__trip_repm'] + data['Time_Impact(hours)'] = round((data[repm+'_dura'] - data[mode+'_dura']),3) + + return data + + +def calc_avg_dura(data, dist, time, mode, meth='average'): + """ + Purpose: + To determine average speed of modes for participant trips in OpenPath + + Parameters: + data - participant trip data from OpenPAth + dist - feature name in df of feature with distance in miles + time - feature name in df of feature with time information + mode - feature name in df of feature with confirmed mode + meth - string representing method for aggregation by group + ['average', 'median'] + Process: + Calculate and append durations of each trip + Aggregate average duration for each mode + Save averages in auxiallary files + + Returns: + data - data with duration feature for each trip (pandas DataFrame) + mdur - Pandas series with average duration by mode + """ + + data = data.copy() + + data['D(time/PMT)'] = data[time] / data[dist] + + grup = data.groupby(mode) + + mdur = None + if(meth == 'average'): + mdur = grup['D(time/PMT)'].mean() + elif(meth == 'median'): + mdur = grup['D(time/PMT)'].median() + else: + print(f'Method invalid: {meth}.') + return data, None + + return data, mdur + + \ No newline at end of file