-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy pathrebuilding_features.py
184 lines (148 loc) · 7.18 KB
/
rebuilding_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import numpy as np
import pandas as pd
import pickle
from os.path import join
# TODO consolidate constants & filepaths throughout codebase
DATA_DIR = join("data")
def load_batches_to_dict(amount_to_load=3):
"""Loads batches from disc and returns one concatenated dict.
amount_to_load specifies the number of batches to load, starting from 1."""
if amount_to_load < 1 or amount_to_load > 3:
raise "amount_to_load is not a valid number! Try a number between 1 and 3."
batches_dict = {} # Initializing
# Replicating Load Data logic
print("Loading batch1 ...")
path1 = join(DATA_DIR, "batch1.pkl")
batch1 = pickle.load(open(path1, 'rb'))
#remove batteries that do not reach 80% capacity
del batch1['b1c8']
del batch1['b1c10']
del batch1['b1c12']
del batch1['b1c13']
del batch1['b1c22']
batches_dict.update(batch1)
if amount_to_load > 1:
print("Loading batch2 ...")
path2 = join(DATA_DIR, "batch2.pkl")
batch2 = pickle.load(open(path2, 'rb'))
# There are four cells from batch1 that carried into batch2, we'll remove the data from batch2
# and put it with the correct cell from batch1
batch2_keys = ['b2c7', 'b2c8', 'b2c9', 'b2c15', 'b2c16']
batch1_keys = ['b1c0', 'b1c1', 'b1c2', 'b1c3', 'b1c4']
add_len = [662, 981, 1060, 208, 482]
for i, bk in enumerate(batch1_keys):
batch1[bk]['cycle_life'] = batch1[bk]['cycle_life'] + add_len[i]
for j in batch1[bk]['summary'].keys():
if j == 'cycle':
batch1[bk]['summary'][j] = np.hstack((batch1[bk]['summary'][j], batch2[batch2_keys[i]]['summary'][j] + len(batch1[bk]['summary'][j])))
else:
batch1[bk]['summary'][j] = np.hstack((batch1[bk]['summary'][j], batch2[batch2_keys[i]]['summary'][j]))
last_cycle = len(batch1[bk]['cycles'].keys())
for j, jk in enumerate(batch2[batch2_keys[i]]['cycles'].keys()):
batch1[bk]['cycles'][str(last_cycle + j)] = batch2[batch2_keys[i]]['cycles'][jk]
del batch2['b2c7']
del batch2['b2c8']
del batch2['b2c9']
del batch2['b2c15']
del batch2['b2c16']
# All keys have to be updated after the reordering.
batches_dict.update(batch1)
batches_dict.update(batch2)
if amount_to_load > 2:
print("Loading batch3 ...")
path3 = join(DATA_DIR, "batch3.pkl")
batch3 = pickle.load(open(path3, 'rb'))
# remove noisy channels from batch3
del batch3['b3c37']
del batch3['b3c2']
del batch3['b3c23']
del batch3['b3c32']
del batch3['b3c38']
del batch3['b3c39']
batches_dict.update(batch3)
print("Done loading batches")
return batches_dict
def build_feature_df(batch_dict):
"""Returns a pandas DataFrame with all originally used features out of a loaded batch dict"""
print("Start building features ...")
from scipy.stats import skew, kurtosis
from sklearn.linear_model import LinearRegression
n_cells = len(batch_dict.keys())
## Initializing feature vectors:
cycle_life = np.zeros(n_cells)
# 1. delta_Q_100_10(V)
minimum_dQ_100_10 = np.zeros(n_cells)
variance_dQ_100_10 = np.zeros(n_cells)
skewness_dQ_100_10 = np.zeros(n_cells)
kurtosis_dQ_100_10 = np.zeros(n_cells)
# 2. Discharge capacity fade curve features
slope_lin_fit_2_100 = np.zeros(n_cells) # Slope of the linear fit to the capacity fade curve, cycles 2 to 100
intercept_lin_fit_2_100 = np.zeros(n_cells) # Intercept of the linear fit to capavity face curve, cycles 2 to 100
discharge_capacity_2 = np.zeros(n_cells) # Discharge capacity, cycle 2
diff_discharge_capacity_max_2 = np.zeros(n_cells) # Difference between max discharge capacity and cycle 2
# 3. Other features
mean_charge_time_2_6 = np.zeros(n_cells) # Average charge time, cycle 2 to 6
minimum_IR_2_100 = np.zeros(n_cells) # Minimum internal resistance
# I skipped the tempererature integreal because i have no idea what it means.
diff_IR_100_2 = np.zeros(n_cells) # Internal resistance, difference between cycle 100 and cycle 2
# Classifier features
minimum_dQ_5_4 = np.zeros(n_cells)
variance_dQ_5_4 = np.zeros(n_cells)
cycle_550_clf = np.zeros(n_cells)
for i, cell in enumerate(batch_dict.values()):
cycle_life[i] = cell['cycle_life']
# 1. delta_Q_100_10(V)
c10 = cell['cycles']['10']
c100 = cell['cycles']['100']
dQ_100_10 = c100['Qdlin'] - c10['Qdlin']
minimum_dQ_100_10[i] = np.log(np.abs(np.min(dQ_100_10)))
variance_dQ_100_10[i] = np.log(np.var(dQ_100_10))
skewness_dQ_100_10[i] = np.log(np.abs(skew(dQ_100_10)))
kurtosis_dQ_100_10[i] = np.log(np.abs(kurtosis(dQ_100_10)))
# 2. Discharge capacity fade curve features
# Compute linear fit for cycles 2 to 100:
q = cell['summary']['QD'][1:100].reshape(-1, 1) # discharge cappacities; q.shape = (99, 1);
X = cell['summary']['cycle'][1:100].reshape(-1, 1) # Cylce index from 2 to 100; X.shape = (99, 1)
linear_regressor_2_100 = LinearRegression()
linear_regressor_2_100.fit(X, q)
slope_lin_fit_2_100[i] = linear_regressor_2_100.coef_[0]
intercept_lin_fit_2_100[i] = linear_regressor_2_100.intercept_
discharge_capacity_2[i] = q[0][0]
diff_discharge_capacity_max_2[i] = np.max(q) - q[0][0]
# 3. Other features
mean_charge_time_2_6[i] = np.mean(cell['summary']['chargetime'][1:6])
minimum_IR_2_100[i] = np.min(cell['summary']['IR'][1:100])
diff_IR_100_2[i] = cell['summary']['IR'][100] - cell['summary']['IR'][1]
# Classifier features
c4 = cell['cycles']['4']
c5 = cell['cycles']['5']
dQ_5_4 = c5['Qdlin'] - c4['Qdlin']
minimum_dQ_5_4[i] = np.log(np.abs(np.min(dQ_5_4)))
variance_dQ_5_4[i] = np.log(np.var(dQ_5_4))
cycle_550_clf[i] = cell['cycle_life'] >= 550
features_df = pd.DataFrame({
"cell_key": np.array(list(batch_dict.keys())),
"minimum_dQ_100_10": minimum_dQ_100_10,
"variance_dQ_100_10": variance_dQ_100_10,
"skewness_dQ_100_10": skewness_dQ_100_10,
"kurtosis_dQ_100_10": kurtosis_dQ_100_10,
"slope_lin_fit_2_100": slope_lin_fit_2_100,
"intercept_lin_fit_2_100": intercept_lin_fit_2_100,
"discharge_capacity_2": discharge_capacity_2,
"diff_discharge_capacity_max_2": diff_discharge_capacity_max_2,
"mean_charge_time_2_6": mean_charge_time_2_6,
"minimum_IR_2_100": minimum_IR_2_100,
"diff_IR_100_2": diff_IR_100_2,
"minimum_dQ_5_4": minimum_dQ_5_4,
"variance_dQ_5_4": variance_dQ_5_4,
"cycle_life": cycle_life,
"cycle_550_clf": cycle_550_clf
})
print("Done building features")
return features_df
if __name__ == "__main__":
all_batches_dict = load_batches_to_dict()
features_df = build_feature_df(all_batches_dict)
save_csv_path = join(DATA_DIR, "rebuild_features.csv")
features_df.to_csv(save_csv_path, index=False)
print("Saved features to ", save_csv_path)