diff --git a/pdr_backend/aimodel/aimodel_data_factory.py b/pdr_backend/aimodel/aimodel_data_factory.py index 32d90c12a..561393700 100644 --- a/pdr_backend/aimodel/aimodel_data_factory.py +++ b/pdr_backend/aimodel/aimodel_data_factory.py @@ -1,3 +1,7 @@ +# +# Copyright 2024 Ocean Protocol Foundation +# SPDX-License-Identifier: Apache-2.0 +# import logging import sys from typing import List, Optional, Tuple @@ -112,8 +116,6 @@ def create_xy( x_dim_len = len(train_feeds_list) * ss.autoregressive_n diff = 0 if ss.transform == "None" else 1 - features: List[pd.Series] = [] - # main work xcol_list = [] # [col_i] : name_str x_list = [] # [col_i] : Series. Build this up. Not df here (slow) @@ -150,16 +152,6 @@ def create_xy( x_col = hist_col + f":(z(t-{ds1})-z(t-{ds11}))/z(t-{ds11})" xcol_list += [x_col] - for i, feature in enumerate(features): - assert type(feature) == pd.Series # type check for mypy - feature_np = list(feature.values) - features_shifted = pd.Series( - _slice(feature_np, -shift - N_train - 1, -shift) - ) - x_list += [features_shifted] - xrecent_list += [pd.Series(_slice(feature_np, -shift, -shift + 1))] - xcol_list.append(f"{feature.name}_t-{ds1}-{i}") - # convert x lists to dfs, all at once. Faster than building up df. assert len(x_list) == len(xrecent_list) == len(xcol_list) x_df = pd.concat(x_list, keys=xcol_list, axis=1) @@ -189,8 +181,7 @@ def create_xy( # postconditions assert X.shape[0] == yraw.shape[0] == ytran.shape[0] assert X.shape[0] <= (N_train + 1) - feature_dims = len(features) * len(train_feeds_list) * ss.autoregressive_n - assert X.shape[1] == x_dim_len + feature_dims + assert X.shape[1] == x_dim_len assert isinstance(x_df, pd.DataFrame) assert "timestamp" not in x_df.columns