integrated forecasting of load into general optimization pipeline

Midren · Jan 10, 2022 · 4dc5076 · 4dc5076
1 parent b8a30c9
commit 4dc5076
Show file tree

Hide file tree

Showing 4 changed files with 323 additions and 145 deletions.
diff --git a/notebooks/Forecasting.ipynb b/notebooks/Forecasting.ipynb
diff --git a/python_libs/battery_optimizations/date_splitting.py b/python_libs/battery_optimizations/date_splitting.py
@@ -97,10 +97,31 @@ def _get_day_diff(df: pd.DataFrame, day: datetime.date):
     return (datetime.datetime(day.year, day.month, day.day, first_day.hour) - first_day).days
 
 
-def get_day_load(df: pd.DataFrame, day: datetime.date):
+def get_days_load(df: pd.DataFrame, day: datetime.date, n_days=1):
     start_of_day = _get_start_of_days(df)
-    st = start_of_day[_get_day_diff(df, day)]
-    return df.iloc[st:st + 24]
+    day = datetime.datetime.combine(day, datetime.datetime.min.time())
+
+    st_idx = _get_day_diff(df, day)
+    while (df.iloc[start_of_day[st_idx-1]].name.to_pydatetime() - day).days > 0:
+        st_idx -= 1
+    while (day - df.iloc[start_of_day[st_idx+1]].name.to_pydatetime()).days > 0:
+        st_idx += 1
+    st = start_of_day[st_idx]
+
+    return df.iloc[st:st + 24*n_days]
+
+def get_day_idx(df: pd.DataFrame, day: datetime.date):
+    start_of_day = _get_start_of_days(df)
+    day = datetime.datetime.combine(day, datetime.datetime.min.time())
+
+    st_idx = _get_day_diff(df, day)
+    while (df.iloc[start_of_day[st_idx-1]].name.to_pydatetime() - day).days > 0:
+        st_idx -= 1
+    while (day - df.iloc[start_of_day[st_idx+1]].name.to_pydatetime()).days > 0:
+        st_idx += 1
+    st = start_of_day[st_idx]
+
+    return st
 
 
 def get_interval_load(df: pd.DataFrame, st_day: datetime.date, en_day: datetime.date):

diff --git a/python_libs/battery_optimizations/load_forecasting.py b/python_libs/battery_optimizations/load_forecasting.py
@@ -1,56 +1,137 @@
-from pathlib import Path
-import typing as t
-
-import torch
 import numpy as np
 import pandas as pd
-import pytorch_lightning as pl
-from torch.utils.data import Dataset, DataLoader
-
-class TimeseriesDataset(Dataset):
-    '''
-    Custom Dataset subclass.
-    Serves as input to DataLoader to transform X
-      into sequence data using rolling window.
-    DataLoader using this dataset will output batches
-      of `(batch_size, seq_len, n_features)` shape.
-    Suitable as an input to RNNs.
-
-    from: https://www.kaggle.com/tartakovsky/pytorch-lightning-lstm-timeseries-clean-code
-    '''
-    def __init__(self, X: np.ndarray, y: np.ndarray, seq_len: int = 1):
-        self.X = torch.tensor(X).float()
-        self.y = torch.tensor(y).float()
-        self.seq_len = seq_len
-
-    def __len__(self):
-        return self.X.__len__() - (self.seq_len-1)
-
-    def __getitem__(self, index):
-        return (self.X[index:index+self.seq_len], self.y[index+self.seq_len-1])
-
-class SwedenLoadDataModule(pl.LightningDataModule):
-    def __init__(self, filepath: Path = Path('data/sweden_load_2005_2017.csv'), batch_size: int = 32):
-        super().__init__()
-        self.filepath = filepath
-        df = pd.read_csv(filepath)
-        df['cet_cest_timestamp'] = df['cet_cest_timestamp'].apply(lambda x: x.replace(tzinfo=None))
-        df = df.rename({'cet_cest_timestamp': 'time', 'SE_load_actual_tso': 'load'}, axis=1)
-        self.df = df
-
-    def setup(self, stage=None):
-        if stage == 'fit' and self.X_train is not None:
-            return
-        if stage == 'test' and self.X_test is not None:
-            return
-        if stage is None and self.X_train is not None and self.X_test is not None:
-            return
-
-    def train_dataloader(self) -> DataLoader:
-        ...
-
-    def val_dataloader(self) -> t.Union[DataLoader, t.List[DataLoader]]:
-        ...
-
-    def test_dataloader(self) -> t.Union[DataLoader, t.List[DataLoader]]:
-        ...
+
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+
+from statsmodels.tsa import seasonal
+from statsmodels.tsa.arima.model import ARIMA
+
+from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error
+
+def _decompose(ts, period, show_figs=False):
+    decomposition = seasonal.seasonal_decompose(ts, period=period)
+
+    trend = decomposition.trend
+    trend.dropna(inplace=True)
+    seas = decomposition.seasonal
+    residual = decomposition.resid
+    residual.dropna(inplace=True);
+
+    if show_figs:
+        # Original
+        plt.subplot(411)
+        plt.plot(ts, label='Original')
+        plt.legend(loc='upper left')
+
+        # Seasonality
+        plt.subplot(412)
+        plt.plot(seas, label='Seasonality')
+        plt.legend(loc='upper left')
+
+        # Trend
+        plt.subplot(413)
+        plt.plot(trend, label='Trend')
+        plt.legend(loc='upper left')
+
+        # Resudials
+        plt.subplot(414)
+        plt.plot(residual, label='Residuals')
+        plt.legend(loc='upper left')
+
+        plt.tight_layout()
+        plt.show()
+    return seas, trend, residual
+
+
+def predict_arima(model_params, ts, train_indeces, val_indeces, diff_order=0):
+    train_val = ts.iloc[[train_indeces[0] - i for i in range(1, diff_order+1)][::-1] + list(train_indeces)].values
+    offsets = []
+    for i in range(diff_order):
+        offsets.append(train_val[0])
+        train_val = np.diff(train_val, 1)
+
+    predicted = np.array([])
+    for val_index in tqdm(val_indeces):
+        model = ARIMA(np.append(train_val, predicted), **model_params)
+        model_fit = model.fit(method_kwargs={"warn_convergence": False})
+        res = model_fit.forecast(1)
+        train_indeces = np.append(train_indeces, val_index)
+        predicted = np.append(predicted, res[0])
+
+    res = np.r_[train_val, predicted]
+    for i in range(1, diff_order+1):
+        res = np.r_[offsets[-i], res].cumsum()
+    return res[diff_order+len(train_val):]
+
+
+def plot_predicted(ts, predicted, train_indeces, val_indeces, ax, title):
+    ax.plot(ts[np.append(train_indeces, val_indeces)].values)
+    ax.plot(np.arange(len(train_indeces), len(train_indeces) + len(val_indeces), 1), predicted)
+    ax.set_ylabel('Load')
+    ax.set_xlabel('Hours')
+    ax.set_title(title)
+
+
+def decompose_load(load_df: pd.DataFrame, show_figs=False):
+    year_seasonal, year_trend, year_res = _decompose(load_df, 24*365, show_figs=show_figs)
+    day_seasonal, day_trend, day_res = _decompose(year_res, 24, show_figs=show_figs)
+
+    # remove part of data for easier additivity
+    year_seasonal = year_seasonal[366*24//2:-366*24//2]
+    year_trend = year_trend[24//2:-24//2]
+    day_seasonal = day_seasonal[24//2:-24//2]
+
+    return year_seasonal, year_trend, year_res, day_seasonal, day_trend, day_res
+
+
+def predict_day_load(load_df: pd.DataFrame, day_idx: int, show_figs=False):
+    year_seasonal, year_trend, year_res, day_seasonal, day_trend, day_res = decompose_load(load_df, show_figs=False)
+
+    day_idx -= 366*24//2-1
+
+    # train size 2 weeks, prediction for 1 day
+    train_indeces = day_idx + np.arange(0, 2*24*7, 1)
+    val_indeces = train_indeces[-1] + np.arange(0, 24, 1)
+
+    # TODO: add prediction by LSTM (see: Forecasting.ipynb)
+    predicted_day_r = predict_arima({'order': (9, 1, 24)}, day_res, train_indeces, val_indeces)
+    predicted_day_t = predict_arima({'order': (3, 1, 2)}, day_trend, train_indeces, val_indeces, diff_order=1)
+    predicted_year_r = predict_arima({'order': (3, 1, 2)}, year_res, train_indeces, val_indeces)
+    predicted_year_t = predict_arima({'order': (3, 1, 2)}, year_trend, train_indeces, val_indeces, diff_order=2)
+
+    total = year_seasonal + year_trend + day_seasonal + day_trend + day_res
+    predicted_t = year_seasonal.iloc[val_indeces] + predicted_year_t + day_seasonal.iloc[val_indeces] + predicted_day_t + predicted_day_r
+
+    if show_figs:
+        fig = plt.figure()
+        ax_1 = fig.add_subplot(2, 1, 1)
+        ax_2 = fig.add_subplot(2, 4, 5)
+        ax_3 = fig.add_subplot(2, 4, 6)
+        ax_4 = fig.add_subplot(2, 4, 7)
+        ax_5 = fig.add_subplot(2, 4, 8)
+
+        def plot_prediction(ts, predicted_ts, train_idx, val_idx, ax, title):
+            rmse = mean_squared_error(ts.iloc[val_idx], predicted_ts, squared=False)
+            mae = mean_absolute_error(ts.iloc[val_idx], predicted_ts)
+            mape = mean_absolute_percentage_error(ts.iloc[val_idx], predicted_ts)
+            plot_predicted(ts, predicted_ts, train_idx, val_idx, ax, f'{title}\nMAPE: {mape:.2f}\nMAE: {mae:.2f}\nRMSE: {rmse:.2f}')
+
+        plot_prediction(total, predicted_t, train_indeces, val_indeces, ax_1, 'Total load')
+        plot_prediction(day_res, predicted_day_r, train_indeces, val_indeces, ax_2, 'Day residuals')
+        plot_prediction(day_trend, predicted_day_t, train_indeces, val_indeces, ax_3, 'Day trend')
+        plot_prediction(year_res, predicted_year_r, train_indeces, val_indeces, ax_4, 'Year residuals')
+        plot_prediction(year_trend, predicted_year_t, train_indeces, val_indeces, ax_5, 'Year trend')
+
+        plt.show()
+    return predicted_t
+
+if __name__ == "__main__":
+    df = pd.read_csv('/Users/roman.milishchuk/bachelor/notebooks/data/sweden_load_2005_2017.csv', parse_dates=['cet_cest_timestamp'])
+
+    df['cet_cest_timestamp'] = df['cet_cest_timestamp'].apply(lambda x: x.replace(tzinfo=None))
+    df = df.rename({'cet_cest_timestamp': 'time', 'SE_load_actual_tso': 'load'}, axis=1)
+    df = df.set_index('time')
+    # df = df.loc[~df.index.duplicated(keep='first')]
+
+    predict_day_load(df, 103784, show_figs=True)
diff --git a/python_libs/battery_optimizations/peak_shaving.py b/python_libs/battery_optimizations/peak_shaving.py
@@ -9,7 +9,8 @@
 import matplotlib.pyplot as plt
 import neptune.new as neptune
 
-from battery_optimizations.date_splitting import get_day_load, get_interval_load
+from battery_optimizations.load_forecasting import predict_day_load
+from battery_optimizations.date_splitting import get_days_load, get_interval_load, get_day_idx
 from battery_optimizations.reference_signal import calculate_ref_load
 
 from mpc_optimization.battery_model_constants import Battery_state_vars, State_vars_aliases_dict, C_rate_per_second
@@ -21,7 +22,8 @@ def get_load_df() -> pd.DataFrame:
     df['cet_cest_timestamp'] = df['cet_cest_timestamp'].apply(lambda x: x.replace(tzinfo=None))
     df = df.rename({'cet_cest_timestamp': 'time', 'SE_load_actual_tso': 'load'}, axis=1)
     df = df.set_index('time')
-    df = df.loc[~df.index.duplicated(keep='first')]
+    # TODO: removal of duplicated indeces breaks prediction
+    # df = df.loc[~df.index.duplicated(keep='first')]
     return df
 
 def show_week_interval(df):
@@ -68,7 +70,7 @@ def get_ref_price(expected_load: pd.Series, step, time):
     run = neptune.init(project='midren/mpc-peak-shaving',
                        api_token='eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIwNTZhNzExNS01MmY2LTRjZjctYjQyMS03Y2QxYTM4ZGQ5YWYifQ==',
                        source_files=[],
-                       description='Change load interpolation to 10 sec'
+                       description='Prediction integration'
                        # mode='debug'
                        )
 
@@ -107,16 +109,28 @@ def get_ref_price(expected_load: pd.Series, step, time):
     control_df.set_index('Time', inplace=True)
 
     df = get_load_df()
-    load_df = get_day_load(df, datetime.date(2017, 11, 6))
+    day = datetime.date(2016, 11, 6)
+
+    st_idx = get_day_idx(df, day)
+    st_idx -= 2*24*7
+    load_df = predict_day_load(df, st_idx)
+    true_load_df = get_days_load(df, day, n_days=1)
+
+    # load_df = get_days_load(df, datetime.date(2017, 11, 6), n_days=2*7*24+24)
     expected_load = calculate_ref_load(load_df.values.flatten(), battery_capacity=capacity_ah)
     for i in load_df.values:
+        run['initial/predicted_load'].log(i)
+    for i in true_load_df.values:
         run['initial/load'].log(i[0])
     for i in expected_load:
         run['initial/optimal_load'].log(i)
 
     expected_load = pd.DataFrame({'Time': load_df.index, 'load': expected_load}).set_index('Time').asfreq(freq='10S').interpolate('linear')
     load_df = load_df.asfreq(freq='10S').interpolate('linear')
+    true_load_df = true_load_df.asfreq(freq='10S').interpolate('linear')
     for i in load_df.values:
+        run['initial/interpolated_predicted_load'].log(i)
+    for i in true_load_df.values:
         run['initial/interpolated_load'].log(i[0])
     for i in expected_load.values:
         run['initial/interpolated_optimal_load'].log(i[0])
@@ -127,7 +141,7 @@ def cost_func(step_num: int, states: pd.DataFrame, input: pd.DataFrame, output:
         controlled_idx = states['time'].sub(min(time+3*step, final_time-10)).abs().idxmin() + 1
 
         controlled_timepoints = states['time'][:controlled_idx]
-        get_energy = partial(get_energy_mwh_required, load_df)
+        get_energy = partial(get_energy_mwh_required, true_load_df)
         controlled_energy = np.array(list(map(get_energy, controlled_timepoints)))
         controlled_discharged_energy = input[:controlled_idx]['I_req'].values*capacity_wh/3600
         # controlled_discharged_energy = input[:controlled_idx]['I_req'].values*capacity_wh*step/3600
@@ -153,7 +167,7 @@ def cost_func(step_num: int, states: pd.DataFrame, input: pd.DataFrame, output:
     def visualize_output(step_num: int, states: pd.DataFrame, run: neptune.Run):
         timepoints = states['time']
 
-        required = np.array(list(map(lambda time: get_power_load(load_df, time)/1000, timepoints.values)))
+        required = np.array(list(map(lambda time: get_power_load(true_load_df, time)/1000, timepoints.values)))
         discharged = states['I_req'].values*capacity_ah*nominal_v
 
         for _, state in itertools.islice(states.iterrows(), len(states) - 1):