-
Notifications
You must be signed in to change notification settings - Fork 2
/
app.py
232 lines (200 loc) · 11.5 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# Importing libraries-----------------------------------------------------------------------------------------
import streamlit as st
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from scipy import stats
from sklearn.ensemble import RandomForestRegressor
# Creating Sidebar-------------------------------------------------------------------------------------------
with st.sidebar:
st.markdown("# CO2 Emissions by Vehicle")
user_input = st.selectbox('Please select',('Visulization','Model'))
# Load the vehicle dataset
df = pd.read_csv('co2 Emissions.csv')
# Drop rows with natural gas as fuel type
fuel_type_mapping = {"Z": "Premium Gasoline","X": "Regular Gasoline","D": "Diesel","E": "Ethanol(E85)","N": "Natural Gas"}
df["Fuel Type"] = df["Fuel Type"].map(fuel_type_mapping)
df_natural = df[~df["Fuel Type"].str.contains("Natural Gas")].reset_index(drop=True)
# Remove outliers from the data
df_new = df_natural[['Engine Size(L)', 'Cylinders', 'Fuel Consumption Comb (L/100 km)', 'CO2 Emissions(g/km)']]
df_new_model = df_new[(np.abs(stats.zscore(df_new)) < 1.9).all(axis=1)]
# Visulization-------------------------------------------------------------------------------------------------
if user_input == 'Visulization':
# Remove unwanted warnings---------------------------------------------------------------------------------
st.set_option('deprecation.showPyplotGlobalUse', False)
# Showing Dataset------------------------------------------------------------------------------------------
st.title('CO2 Emissions by Vehicle')
st.header("Data We collected from the source")
st.write(df)
# Brands of Cars-------------------------------------------------------------------------------------------
st.subheader('Brands of Cars')
df_brand = df['Make'].value_counts().reset_index().rename(columns={'count':'Count'})
plt.figure(figsize=(15, 6))
fig1 = sns.barplot(data=df_brand, x="Make", y="Count")
plt.xticks(rotation=75)
plt.title("All Car Companies and their Cars")
plt.xlabel("Companies")
plt.ylabel("Cars")
plt.bar_label(fig1.containers[0], fontsize=7)
st.pyplot()
st.write(df_brand)
# Top 25 Models of Cars------------------------------------------------------------------------------------
st.subheader('Top 25 Models of Cars')
df_model = df['Model'].value_counts().reset_index().rename(columns={'count':'Count'})
plt.figure(figsize=(20, 6))
fig2 = sns.barplot(data=df_model[:25], x="Model", y="Count")
plt.xticks(rotation=75)
plt.title("Top 25 Car Models")
plt.xlabel("Models")
plt.ylabel("Cars")
plt.bar_label(fig2.containers[0])
st.pyplot()
st.write(df_model)
# Vehicle Class--------------------------------------------------------------------------------------------
st.subheader('Vehicle Class')
df_vehicle_class = df['Vehicle Class'].value_counts().reset_index().rename(columns={'count':'Count'})
plt.figure(figsize=(20, 5))
fig3 = sns.barplot(data=df_vehicle_class, x="Vehicle Class", y="Count")
plt.xticks(rotation=75)
plt.title("All Vehicle Class")
plt.xlabel("Vehicle Class")
plt.ylabel("Cars")
plt.bar_label(fig3.containers[0])
st.pyplot()
st.write(df_vehicle_class)
# Engine Sizes of Cars-------------------------------------------------------------------------------------
st.subheader('Engine Sizes of Cars')
df_engine_size = df['Engine Size(L)'].value_counts().reset_index().rename(columns={'count':'Count'})
plt.figure(figsize=(20, 6))
fig4 = sns.barplot(data=df_engine_size, x="Engine Size(L)", y="Count")
plt.xticks(rotation=90)
plt.title("All Engine Sizes")
plt.xlabel("Engine Size(L)")
plt.ylabel("Cars")
plt.bar_label(fig4.containers[0])
st.pyplot()
st.write(df_engine_size)
# Cylinders-----------------------------------------------------------------------------------------------
st.subheader('Cylinders')
df_cylinders = df['Cylinders'].value_counts().reset_index().rename(columns={'count':'Count'})
plt.figure(figsize=(20, 6))
fig5 = sns.barplot(data=df_cylinders, x="Cylinders", y="Count")
plt.xticks(rotation=90)
plt.title("All Cylinders")
plt.xlabel("Cylinders")
plt.ylabel("Cars")
plt.bar_label(fig5.containers[0])
st.pyplot()
st.write(df_cylinders)
# Transmission of Cars------------------------------------------------------------------------------------
transmission_mapping = { "A4": "Automatic", "A5": "Automatic", "A6": "Automatic", "A7": "Automatic", "A8": "Automatic", "A9": "Automatic", "A10": "Automatic", "AM5": "Automated Manual", "AM6": "Automated Manual", "AM7": "Automated Manual", "AM8": "Automated Manual", "AM9": "Automated Manual", "AS4": "Automatic with Select Shift", "AS5": "Automatic with Select Shift", "AS6": "Automatic with Select Shift", "AS7": "Automatic with Select Shift", "AS8": "Automatic with Select Shift", "AS9": "Automatic with Select Shift", "AS10": "Automatic with Select Shift", "AV": "Continuously Variable", "AV6": "Continuously Variable", "AV7": "Continuously Variable", "AV8": "Continuously Variable", "AV10": "Continuously Variable", "M5": "Manual", "M6": "Manual", "M7": "Manual"}
df["Transmission"] = df["Transmission"].map(transmission_mapping)
st.subheader('Transmission')
df_transmission = df['Transmission'].value_counts().reset_index().rename(columns={'count': 'Count'})
fig6 = plt.figure(figsize=(20, 5))
sns.barplot(data=df_transmission, x="Transmission", y="Count")
plt.title("All Transmissions")
plt.xlabel("Transmissions")
plt.ylabel("Cars")
plt.bar_label(plt.gca().containers[0])
st.pyplot(fig6)
st.write(df_transmission)
# Fuel Type of Cars--------------------------------------------------------------------------------------
st.subheader('Fuel Type')
df_fuel_type = df['Fuel Type'].value_counts().reset_index().rename(columns={'count': 'Count'})
fig7 = plt.figure(figsize=(20, 5))
sns.barplot(data=df_fuel_type, x="Fuel Type", y="Count")
plt.title("All Fuel Types")
plt.xlabel("Fuel Types")
plt.ylabel("Cars")
plt.bar_label(plt.gca().containers[0])
st.pyplot(fig7)
st.text("We have only one data on natural gas. So we cannot predict anything using only one data. That's why we have to drop this row.")
st.write(df_fuel_type)
# Removing Natural Gas-----------------------------------------------------------------------------------
st.subheader('After removing Natural Gas data')
df_ftype = df_natural['Fuel Type'].value_counts().reset_index().rename(columns={'count': 'Count'})
fig8 = plt.figure(figsize=(20, 5))
sns.barplot(data=df_ftype, x="Fuel Type", y="Count")
plt.title("All Fuel Types")
plt.xlabel("Fuel Types")
plt.ylabel("Cars")
plt.bar_label(plt.gca().containers[0])
st.pyplot(fig8)
st.write(df_ftype)
# CO2 Emission variation with Brand----------------------------------------------------------------------
st.header('Variation in CO2 emissions with different features')
st.subheader('CO2 Emission with Brand ')
df_co2_make = df.groupby(['Make'])['CO2 Emissions(g/km)'].mean().sort_values().reset_index()
fig8 = plt.figure(figsize=(20, 5))
sns.barplot(data=df_co2_make, x="Make", y="CO2 Emissions(g/km)")
plt.xticks(rotation=90)
plt.title("CO2 Emissions variation with Brand")
plt.xlabel("Brands")
plt.ylabel("CO2 Emissions(g/km)")
plt.bar_label(plt.gca().containers[0], fontsize=8, fmt='%.1f')
st.pyplot(fig8)
def plot_bar(data, x_label, y_label, title):
plt.figure(figsize=(23, 5))
sns.barplot(data=data, x=x_label, y=y_label)
plt.xticks(rotation=90)
plt.title(title)
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.bar_label(plt.gca().containers[0], fontsize=9)
# CO2 Emissions variation with Vehicle Class-------------------------------------------------------------
st.subheader('CO2 Emissions variation with Vehicle Class')
df_co2_vehicle_class = df.groupby(['Vehicle Class'])['CO2 Emissions(g/km)'].mean().sort_values().reset_index()
plot_bar(df_co2_vehicle_class, "Vehicle Class", "CO2 Emissions(g/km)", "CO2 Emissions variation with Vehicle Class")
st.pyplot()
# CO2 Emission variation with Transmission---------------------------------------------------------------
st.subheader('CO2 Emission variation with Transmission')
df_co2_transmission = df.groupby(['Transmission'])['CO2 Emissions(g/km)'].mean().sort_values().reset_index()
plot_bar(df_co2_transmission, "Transmission", "CO2 Emissions(g/km)", "CO2 Emission variation with Transmission")
st.pyplot()
# CO2 Emissions variation with Fuel Type--------------------------------------------------------------
st.subheader('CO2 Emissions variation with Fuel Type')
df_co2_fuel_type = df.groupby(['Fuel Type'])['CO2 Emissions(g/km)'].mean().sort_values().reset_index()
plot_bar(df_co2_fuel_type, "Fuel Type", "CO2 Emissions(g/km)", "CO2 Emissions variation with Fuel Type")
st.pyplot()
# Box Plots-------------------------------------------------------------------------------------------
st.header("Box Plots")
plt.figure(figsize=(20, 10))
features = ['Engine Size(L)', 'Cylinders', 'Fuel Consumption Comb (L/100 km)', 'CO2 Emissions(g/km)']
for i, feature in enumerate(features, start=1):
plt.subplot(2, 2, i)
plt.boxplot(df_new[feature])
plt.title(feature)
st.pyplot()
# Outliers-------------------------------------------------------------------------------------------
st.text("As we can see there are some outliers present in our Dataset")
st.subheader("After removing outliers")
st.write("Before removing outliers we have", len(df), "data")
st.write("After removing outliers we have", len(df_new_model), "data")
# Boxplot after removing outliers-------------------------------------------------------------------
st.subheader("Boxplot after removing outliers")
plt.figure(figsize=(20, 10))
for i, feature in enumerate(features, start=1):
plt.subplot(2, 2, i)
plt.boxplot(df_new_model[feature])
plt.title(feature)
st.pyplot()
else:
# Prepare the data for modeling--------------------------------------------------------------------
X = df_new_model[['Engine Size(L)', 'Cylinders', 'Fuel Consumption Comb (L/100 km)']]
y = df_new_model['CO2 Emissions(g/km)']
# Train the random forest regression model---------------------------------------------------------
model = RandomForestRegressor().fit(X, y)
# Create the Streamlit web app---------------------------------------------------------------------
st.title('CO2 Emission Prediction')
st.write('Enter the vehicle specifications to predict CO2 emissions.')
# Input fields for user----------------------------------------------------------------------------
engine_size = st.number_input('Engine Size(L)', step=0.1, format="%.1f")
cylinders = st.number_input('Cylinders', min_value=2, max_value=16, step=1)
fuel_consumption = st.number_input('Fuel Consumption Comb (L/100 km)', step=0.1, format="%.1f")
# Predict CO2 emissions----------------------------------------------------------------------------
input_data = [[cylinders, engine_size, fuel_consumption]]
predicted_co2 = model.predict(input_data)
# Display the prediction---------------------------------------------------------------------------
st.write(f'Predicted CO2 Emissions: {predicted_co2[0]:.2f} g/km')