Skip to content

Commit

Permalink
integrated forecasting of load into general optimization pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
Midren committed Jan 10, 2022
1 parent b8a30c9 commit 4dc5076
Show file tree
Hide file tree
Showing 4 changed files with 323 additions and 145 deletions.
226 changes: 144 additions & 82 deletions notebooks/Forecasting.ipynb

Large diffs are not rendered by default.

27 changes: 24 additions & 3 deletions python_libs/battery_optimizations/date_splitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,31 @@ def _get_day_diff(df: pd.DataFrame, day: datetime.date):
return (datetime.datetime(day.year, day.month, day.day, first_day.hour) - first_day).days


def get_day_load(df: pd.DataFrame, day: datetime.date):
def get_days_load(df: pd.DataFrame, day: datetime.date, n_days=1):
start_of_day = _get_start_of_days(df)
st = start_of_day[_get_day_diff(df, day)]
return df.iloc[st:st + 24]
day = datetime.datetime.combine(day, datetime.datetime.min.time())

st_idx = _get_day_diff(df, day)
while (df.iloc[start_of_day[st_idx-1]].name.to_pydatetime() - day).days > 0:
st_idx -= 1
while (day - df.iloc[start_of_day[st_idx+1]].name.to_pydatetime()).days > 0:
st_idx += 1
st = start_of_day[st_idx]

return df.iloc[st:st + 24*n_days]

def get_day_idx(df: pd.DataFrame, day: datetime.date):
start_of_day = _get_start_of_days(df)
day = datetime.datetime.combine(day, datetime.datetime.min.time())

st_idx = _get_day_diff(df, day)
while (df.iloc[start_of_day[st_idx-1]].name.to_pydatetime() - day).days > 0:
st_idx -= 1
while (day - df.iloc[start_of_day[st_idx+1]].name.to_pydatetime()).days > 0:
st_idx += 1
st = start_of_day[st_idx]

return st


def get_interval_load(df: pd.DataFrame, st_day: datetime.date, en_day: datetime.date):
Expand Down
189 changes: 135 additions & 54 deletions python_libs/battery_optimizations/load_forecasting.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,137 @@
from pathlib import Path
import typing as t

import torch
import numpy as np
import pandas as pd
import pytorch_lightning as pl
from torch.utils.data import Dataset, DataLoader

class TimeseriesDataset(Dataset):
'''
Custom Dataset subclass.
Serves as input to DataLoader to transform X
into sequence data using rolling window.
DataLoader using this dataset will output batches
of `(batch_size, seq_len, n_features)` shape.
Suitable as an input to RNNs.
from: https://www.kaggle.com/tartakovsky/pytorch-lightning-lstm-timeseries-clean-code
'''
def __init__(self, X: np.ndarray, y: np.ndarray, seq_len: int = 1):
self.X = torch.tensor(X).float()
self.y = torch.tensor(y).float()
self.seq_len = seq_len

def __len__(self):
return self.X.__len__() - (self.seq_len-1)

def __getitem__(self, index):
return (self.X[index:index+self.seq_len], self.y[index+self.seq_len-1])

class SwedenLoadDataModule(pl.LightningDataModule):
def __init__(self, filepath: Path = Path('data/sweden_load_2005_2017.csv'), batch_size: int = 32):
super().__init__()
self.filepath = filepath
df = pd.read_csv(filepath)
df['cet_cest_timestamp'] = df['cet_cest_timestamp'].apply(lambda x: x.replace(tzinfo=None))
df = df.rename({'cet_cest_timestamp': 'time', 'SE_load_actual_tso': 'load'}, axis=1)
self.df = df

def setup(self, stage=None):
if stage == 'fit' and self.X_train is not None:
return
if stage == 'test' and self.X_test is not None:
return
if stage is None and self.X_train is not None and self.X_test is not None:
return

def train_dataloader(self) -> DataLoader:
...

def val_dataloader(self) -> t.Union[DataLoader, t.List[DataLoader]]:
...

def test_dataloader(self) -> t.Union[DataLoader, t.List[DataLoader]]:
...

import matplotlib.pyplot as plt
from tqdm import tqdm

from statsmodels.tsa import seasonal
from statsmodels.tsa.arima.model import ARIMA

from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error

def _decompose(ts, period, show_figs=False):
decomposition = seasonal.seasonal_decompose(ts, period=period)

trend = decomposition.trend
trend.dropna(inplace=True)
seas = decomposition.seasonal
residual = decomposition.resid
residual.dropna(inplace=True);

if show_figs:
# Original
plt.subplot(411)
plt.plot(ts, label='Original')
plt.legend(loc='upper left')

# Seasonality
plt.subplot(412)
plt.plot(seas, label='Seasonality')
plt.legend(loc='upper left')

# Trend
plt.subplot(413)
plt.plot(trend, label='Trend')
plt.legend(loc='upper left')

# Resudials
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='upper left')

plt.tight_layout()
plt.show()
return seas, trend, residual


def predict_arima(model_params, ts, train_indeces, val_indeces, diff_order=0):
train_val = ts.iloc[[train_indeces[0] - i for i in range(1, diff_order+1)][::-1] + list(train_indeces)].values
offsets = []
for i in range(diff_order):
offsets.append(train_val[0])
train_val = np.diff(train_val, 1)

predicted = np.array([])
for val_index in tqdm(val_indeces):
model = ARIMA(np.append(train_val, predicted), **model_params)
model_fit = model.fit(method_kwargs={"warn_convergence": False})
res = model_fit.forecast(1)
train_indeces = np.append(train_indeces, val_index)
predicted = np.append(predicted, res[0])

res = np.r_[train_val, predicted]
for i in range(1, diff_order+1):
res = np.r_[offsets[-i], res].cumsum()
return res[diff_order+len(train_val):]


def plot_predicted(ts, predicted, train_indeces, val_indeces, ax, title):
ax.plot(ts[np.append(train_indeces, val_indeces)].values)
ax.plot(np.arange(len(train_indeces), len(train_indeces) + len(val_indeces), 1), predicted)
ax.set_ylabel('Load')
ax.set_xlabel('Hours')
ax.set_title(title)


def decompose_load(load_df: pd.DataFrame, show_figs=False):
year_seasonal, year_trend, year_res = _decompose(load_df, 24*365, show_figs=show_figs)
day_seasonal, day_trend, day_res = _decompose(year_res, 24, show_figs=show_figs)

# remove part of data for easier additivity
year_seasonal = year_seasonal[366*24//2:-366*24//2]
year_trend = year_trend[24//2:-24//2]
day_seasonal = day_seasonal[24//2:-24//2]

return year_seasonal, year_trend, year_res, day_seasonal, day_trend, day_res


def predict_day_load(load_df: pd.DataFrame, day_idx: int, show_figs=False):
year_seasonal, year_trend, year_res, day_seasonal, day_trend, day_res = decompose_load(load_df, show_figs=False)

day_idx -= 366*24//2-1

# train size 2 weeks, prediction for 1 day
train_indeces = day_idx + np.arange(0, 2*24*7, 1)
val_indeces = train_indeces[-1] + np.arange(0, 24, 1)

# TODO: add prediction by LSTM (see: Forecasting.ipynb)
predicted_day_r = predict_arima({'order': (9, 1, 24)}, day_res, train_indeces, val_indeces)
predicted_day_t = predict_arima({'order': (3, 1, 2)}, day_trend, train_indeces, val_indeces, diff_order=1)
predicted_year_r = predict_arima({'order': (3, 1, 2)}, year_res, train_indeces, val_indeces)
predicted_year_t = predict_arima({'order': (3, 1, 2)}, year_trend, train_indeces, val_indeces, diff_order=2)

total = year_seasonal + year_trend + day_seasonal + day_trend + day_res
predicted_t = year_seasonal.iloc[val_indeces] + predicted_year_t + day_seasonal.iloc[val_indeces] + predicted_day_t + predicted_day_r

if show_figs:
fig = plt.figure()
ax_1 = fig.add_subplot(2, 1, 1)
ax_2 = fig.add_subplot(2, 4, 5)
ax_3 = fig.add_subplot(2, 4, 6)
ax_4 = fig.add_subplot(2, 4, 7)
ax_5 = fig.add_subplot(2, 4, 8)

def plot_prediction(ts, predicted_ts, train_idx, val_idx, ax, title):
rmse = mean_squared_error(ts.iloc[val_idx], predicted_ts, squared=False)
mae = mean_absolute_error(ts.iloc[val_idx], predicted_ts)
mape = mean_absolute_percentage_error(ts.iloc[val_idx], predicted_ts)
plot_predicted(ts, predicted_ts, train_idx, val_idx, ax, f'{title}\nMAPE: {mape:.2f}\nMAE: {mae:.2f}\nRMSE: {rmse:.2f}')

plot_prediction(total, predicted_t, train_indeces, val_indeces, ax_1, 'Total load')
plot_prediction(day_res, predicted_day_r, train_indeces, val_indeces, ax_2, 'Day residuals')
plot_prediction(day_trend, predicted_day_t, train_indeces, val_indeces, ax_3, 'Day trend')
plot_prediction(year_res, predicted_year_r, train_indeces, val_indeces, ax_4, 'Year residuals')
plot_prediction(year_trend, predicted_year_t, train_indeces, val_indeces, ax_5, 'Year trend')

plt.show()
return predicted_t

if __name__ == "__main__":
df = pd.read_csv('/Users/roman.milishchuk/bachelor/notebooks/data/sweden_load_2005_2017.csv', parse_dates=['cet_cest_timestamp'])

df['cet_cest_timestamp'] = df['cet_cest_timestamp'].apply(lambda x: x.replace(tzinfo=None))
df = df.rename({'cet_cest_timestamp': 'time', 'SE_load_actual_tso': 'load'}, axis=1)
df = df.set_index('time')
# df = df.loc[~df.index.duplicated(keep='first')]

predict_day_load(df, 103784, show_figs=True)
26 changes: 20 additions & 6 deletions python_libs/battery_optimizations/peak_shaving.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
import matplotlib.pyplot as plt
import neptune.new as neptune

from battery_optimizations.date_splitting import get_day_load, get_interval_load
from battery_optimizations.load_forecasting import predict_day_load
from battery_optimizations.date_splitting import get_days_load, get_interval_load, get_day_idx
from battery_optimizations.reference_signal import calculate_ref_load

from mpc_optimization.battery_model_constants import Battery_state_vars, State_vars_aliases_dict, C_rate_per_second
Expand All @@ -21,7 +22,8 @@ def get_load_df() -> pd.DataFrame:
df['cet_cest_timestamp'] = df['cet_cest_timestamp'].apply(lambda x: x.replace(tzinfo=None))
df = df.rename({'cet_cest_timestamp': 'time', 'SE_load_actual_tso': 'load'}, axis=1)
df = df.set_index('time')
df = df.loc[~df.index.duplicated(keep='first')]
# TODO: removal of duplicated indeces breaks prediction
# df = df.loc[~df.index.duplicated(keep='first')]
return df

def show_week_interval(df):
Expand Down Expand Up @@ -68,7 +70,7 @@ def get_ref_price(expected_load: pd.Series, step, time):
run = neptune.init(project='midren/mpc-peak-shaving',
api_token='eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIwNTZhNzExNS01MmY2LTRjZjctYjQyMS03Y2QxYTM4ZGQ5YWYifQ==',
source_files=[],
description='Change load interpolation to 10 sec'
description='Prediction integration'
# mode='debug'
)

Expand Down Expand Up @@ -107,16 +109,28 @@ def get_ref_price(expected_load: pd.Series, step, time):
control_df.set_index('Time', inplace=True)

df = get_load_df()
load_df = get_day_load(df, datetime.date(2017, 11, 6))
day = datetime.date(2016, 11, 6)

st_idx = get_day_idx(df, day)
st_idx -= 2*24*7
load_df = predict_day_load(df, st_idx)
true_load_df = get_days_load(df, day, n_days=1)

# load_df = get_days_load(df, datetime.date(2017, 11, 6), n_days=2*7*24+24)
expected_load = calculate_ref_load(load_df.values.flatten(), battery_capacity=capacity_ah)
for i in load_df.values:
run['initial/predicted_load'].log(i)
for i in true_load_df.values:
run['initial/load'].log(i[0])
for i in expected_load:
run['initial/optimal_load'].log(i)

expected_load = pd.DataFrame({'Time': load_df.index, 'load': expected_load}).set_index('Time').asfreq(freq='10S').interpolate('linear')
load_df = load_df.asfreq(freq='10S').interpolate('linear')
true_load_df = true_load_df.asfreq(freq='10S').interpolate('linear')
for i in load_df.values:
run['initial/interpolated_predicted_load'].log(i)
for i in true_load_df.values:
run['initial/interpolated_load'].log(i[0])
for i in expected_load.values:
run['initial/interpolated_optimal_load'].log(i[0])
Expand All @@ -127,7 +141,7 @@ def cost_func(step_num: int, states: pd.DataFrame, input: pd.DataFrame, output:
controlled_idx = states['time'].sub(min(time+3*step, final_time-10)).abs().idxmin() + 1

controlled_timepoints = states['time'][:controlled_idx]
get_energy = partial(get_energy_mwh_required, load_df)
get_energy = partial(get_energy_mwh_required, true_load_df)
controlled_energy = np.array(list(map(get_energy, controlled_timepoints)))
controlled_discharged_energy = input[:controlled_idx]['I_req'].values*capacity_wh/3600
# controlled_discharged_energy = input[:controlled_idx]['I_req'].values*capacity_wh*step/3600
Expand All @@ -153,7 +167,7 @@ def cost_func(step_num: int, states: pd.DataFrame, input: pd.DataFrame, output:
def visualize_output(step_num: int, states: pd.DataFrame, run: neptune.Run):
timepoints = states['time']

required = np.array(list(map(lambda time: get_power_load(load_df, time)/1000, timepoints.values)))
required = np.array(list(map(lambda time: get_power_load(true_load_df, time)/1000, timepoints.values)))
discharged = states['I_req'].values*capacity_ah*nominal_v

for _, state in itertools.islice(states.iterrows(), len(states) - 1):
Expand Down

0 comments on commit 4dc5076

Please sign in to comment.