From b6f3d8001bd23e6528501eba672f78a9abfd1294 Mon Sep 17 00:00:00 2001
From: Zezhi Shao <864453277@qq.com>
Date: Sat, 31 Aug 2024 15:12:52 +0000
Subject: [PATCH] update basicts

---
 baselines/AGCRN/run.sh                        |   7 -
 baselines/Autoformer/PEMS04_LTSF.py           | 156 ++++++++++++++++
 baselines/Autoformer/PEMS08_LTSF.py           | 156 ++++++++++++++++
 baselines/Autoformer/run.sh                   |  10 -
 baselines/Crossformer/PEMS04_LTSF.py          | 146 +++++++++++++++
 baselines/Crossformer/PEMS08_LTSF.py          | 146 +++++++++++++++
 baselines/Crossformer/run.sh                  |  10 -
 baselines/D2STGNN/run.sh                      |   7 -
 baselines/DCRNN/run.sh                        |   7 -
 baselines/DGCRN/run.sh                        |   7 -
 .../DLinear/{PEMS04.py => PEMS04_LTSF.py}     |  13 +-
 .../DLinear/{METR-LA.py => PEMS08_LTSF.py}    |  17 +-
 baselines/DLinear/run.sh                      |  10 -
 baselines/DSFormer/PEMS04_LTSF.py             | 145 +++++++++++++++
 baselines/DSFormer/PEMS08_LTSF.py             | 145 +++++++++++++++
 baselines/DSFormer/run.sh                     |  12 --
 baselines/DeepAR/ETTh1.py                     | 129 +++++++++++++
 baselines/DeepAR/ETTm1.py                     | 129 +++++++++++++
 baselines/DeepAR/Electricity.py               | 129 +++++++++++++
 baselines/DeepAR/ExchangeRate.py              | 130 +++++++++++++
 baselines/DeepAR/PEMS04.py                    | 130 +++++++++++++
 baselines/DeepAR/PEMS04_LTSF.py               | 132 +++++++++++++
 baselines/DeepAR/PEMS08_LTSF.py               | 132 +++++++++++++
 baselines/DeepAR/Weather.py                   | 129 +++++++++++++
 baselines/DeepAR/arch/__init__.py             |   1 +
 baselines/DeepAR/arch/deepar.py               | 100 ++++++++++
 baselines/DeepAR/arch/distributions.py        |  22 +++
 baselines/DeepAR/loss/__init__.py             |   1 +
 baselines/DeepAR/loss/gaussian.py             |  29 +++
 baselines/DeepAR/runner/__init__.py           |   1 +
 baselines/DeepAR/runner/deepar_runner.py      | 150 +++++++++++++++
 baselines/FEDformer/PEMS04_LTSF.py            | 157 ++++++++++++++++
 baselines/FEDformer/PEMS08_LTSF.py            | 157 ++++++++++++++++
 baselines/FEDformer/run.sh                    |  10 -
 baselines/GTS/run.sh                          |   7 -
 baselines/GWNet/run.sh                        |   7 -
 baselines/Informer/PEMS04_LTSF.py             | 161 ++++++++++++++++
 baselines/Informer/PEMS08_LTSF.py             | 161 ++++++++++++++++
 baselines/Informer/run.sh                     |  10 -
 baselines/MTGNN/run.sh                        |   7 -
 baselines/NBeats/PEMS04_LTSF.py               | 143 ++++++++++++++
 baselines/NBeats/PEMS08_LTSF.py               | 143 ++++++++++++++
 baselines/NBeats/run.sh                       |  15 --
 baselines/NLinear/ETTh1.py                    |   1 -
 baselines/NLinear/ETTh2.py                    |   1 -
 baselines/NLinear/ETTm1.py                    |   1 -
 baselines/NLinear/ETTm2.py                    |   1 -
 baselines/NLinear/Electricity.py              |   1 -
 baselines/NLinear/ExchangeRate.py             |   1 -
 baselines/NLinear/PEMS04_LTSF.py              | 138 ++++++++++++++
 baselines/NLinear/PEMS08_LTSF.py              | 138 ++++++++++++++
 baselines/NLinear/Weather.py                  |   1 -
 baselines/NLinear/run.sh                      |  10 -
 baselines/PatchTST/PEMS04_LTSF.py             | 155 ++++++++++++++++
 baselines/PatchTST/PEMS08_LTSF.py             | 155 ++++++++++++++++
 baselines/PatchTST/run.sh                     |  10 -
 baselines/Pyraformer/PEMS04_LTSF.py           | 158 ++++++++++++++++
 baselines/Pyraformer/PEMS08_LTSF.py           | 158 ++++++++++++++++
 baselines/Pyraformer/run.sh                   |  10 -
 baselines/STAEformer/run.sh                   |   7 -
 baselines/STGCN/run.sh                        |   7 -
 baselines/STGODE/METR-LA copy.py              | 118 ++++++++++++
 baselines/STGODE/METR-LA.py                   | 143 ++++++++++++++
 baselines/{DLinear => STGODE}/PEMS-BAY.py     |  39 ++--
 baselines/STGODE/PEMS04.py                    | 143 ++++++++++++++
 baselines/{DLinear => STGODE}/PEMS08.py       |  39 ++--
 baselines/STGODE/arch/__init__.py             |   1 +
 baselines/STGODE/arch/odegcn.py               |  73 ++++++++
 baselines/STGODE/arch/stgode.py               | 174 ++++++++++++++++++
 baselines/STGODE/generate_matrices.py         | 116 ++++++++++++
 baselines/STID/run.sh                         |   8 -
 baselines/STNorm/run.sh                       |   7 -
 baselines/STWave/run.sh                       |   7 -
 baselines/StemGNN/run.sh                      |   7 -
 baselines/TimesNet/run.sh                     |  10 -
 baselines/Triformer/PEMS04_LTSF.py            | 133 +++++++++++++
 baselines/Triformer/PEMS08_LTSF.py            | 133 +++++++++++++
 baselines/Triformer/run.sh                    |  10 -
 baselines/WaveNet/METR-LA.py                  | 143 ++++++++++++++
 baselines/WaveNet/arch.py                     | 147 +++++++++++++++
 basicts/metrics/__init__.py                   |   9 +
 basicts/runners/base_runner.py                |   2 +-
 .../generate_training_data.py                 |  13 +-
 .../CA/generate_training_data.py              |  13 +-
 .../ETTh1/generate_training_data.py           |  15 +-
 .../ETTh2/generate_training_data.py           |  13 +-
 .../ETTm1/generate_training_data.py           |  13 +-
 .../ETTm2/generate_training_data.py           |  13 +-
 .../Electricity/generate_training_data.py     |  13 +-
 .../ExchangeRate/generate_training_data.py    |  13 +-
 .../GBA/generate_training_data.py             |  13 +-
 .../GLA/generate_training_data.py             |  13 +-
 .../Gaussian/generate_training_data.py        |  12 +-
 .../Illness/generate_training_data.py         |  13 +-
 .../METR-LA/generate_training_data.py         |  13 +-
 .../PEMS-BAY/generate_training_data.py        |  13 +-
 .../PEMS03/generate_training_data.py          |  13 +-
 .../PEMS04/generate_training_data.py          |  13 +-
 .../PEMS07/generate_training_data.py          |  13 +-
 .../PEMS08/generate_training_data.py          |  13 +-
 .../Pulse/generate_training_data.py           |  13 +-
 .../SD/generate_training_data.py              |  13 +-
 .../Traffic/generate_training_data.py         |  13 +-
 .../Weather/generate_training_data.py         |  13 +-
 104 files changed, 5614 insertions(+), 404 deletions(-)
 delete mode 100644 baselines/AGCRN/run.sh
 create mode 100644 baselines/Autoformer/PEMS04_LTSF.py
 create mode 100644 baselines/Autoformer/PEMS08_LTSF.py
 delete mode 100644 baselines/Autoformer/run.sh
 create mode 100644 baselines/Crossformer/PEMS04_LTSF.py
 create mode 100644 baselines/Crossformer/PEMS08_LTSF.py
 delete mode 100644 baselines/Crossformer/run.sh
 delete mode 100644 baselines/D2STGNN/run.sh
 delete mode 100644 baselines/DCRNN/run.sh
 delete mode 100644 baselines/DGCRN/run.sh
 rename baselines/DLinear/{PEMS04.py => PEMS04_LTSF.py} (91%)
 rename baselines/DLinear/{METR-LA.py => PEMS08_LTSF.py} (89%)
 delete mode 100644 baselines/DLinear/run.sh
 create mode 100644 baselines/DSFormer/PEMS04_LTSF.py
 create mode 100644 baselines/DSFormer/PEMS08_LTSF.py
 delete mode 100644 baselines/DSFormer/run.sh
 create mode 100644 baselines/DeepAR/ETTh1.py
 create mode 100644 baselines/DeepAR/ETTm1.py
 create mode 100644 baselines/DeepAR/Electricity.py
 create mode 100644 baselines/DeepAR/ExchangeRate.py
 create mode 100644 baselines/DeepAR/PEMS04.py
 create mode 100644 baselines/DeepAR/PEMS04_LTSF.py
 create mode 100644 baselines/DeepAR/PEMS08_LTSF.py
 create mode 100644 baselines/DeepAR/Weather.py
 create mode 100644 baselines/DeepAR/arch/__init__.py
 create mode 100644 baselines/DeepAR/arch/deepar.py
 create mode 100644 baselines/DeepAR/arch/distributions.py
 create mode 100644 baselines/DeepAR/loss/__init__.py
 create mode 100644 baselines/DeepAR/loss/gaussian.py
 create mode 100644 baselines/DeepAR/runner/__init__.py
 create mode 100644 baselines/DeepAR/runner/deepar_runner.py
 create mode 100644 baselines/FEDformer/PEMS04_LTSF.py
 create mode 100644 baselines/FEDformer/PEMS08_LTSF.py
 delete mode 100644 baselines/FEDformer/run.sh
 delete mode 100644 baselines/GTS/run.sh
 delete mode 100644 baselines/GWNet/run.sh
 create mode 100644 baselines/Informer/PEMS04_LTSF.py
 create mode 100644 baselines/Informer/PEMS08_LTSF.py
 delete mode 100644 baselines/Informer/run.sh
 delete mode 100644 baselines/MTGNN/run.sh
 create mode 100644 baselines/NBeats/PEMS04_LTSF.py
 create mode 100644 baselines/NBeats/PEMS08_LTSF.py
 delete mode 100644 baselines/NBeats/run.sh
 create mode 100644 baselines/NLinear/PEMS04_LTSF.py
 create mode 100644 baselines/NLinear/PEMS08_LTSF.py
 delete mode 100644 baselines/NLinear/run.sh
 create mode 100644 baselines/PatchTST/PEMS04_LTSF.py
 create mode 100644 baselines/PatchTST/PEMS08_LTSF.py
 delete mode 100644 baselines/PatchTST/run.sh
 create mode 100644 baselines/Pyraformer/PEMS04_LTSF.py
 create mode 100644 baselines/Pyraformer/PEMS08_LTSF.py
 delete mode 100644 baselines/Pyraformer/run.sh
 delete mode 100644 baselines/STAEformer/run.sh
 delete mode 100644 baselines/STGCN/run.sh
 create mode 100644 baselines/STGODE/METR-LA copy.py
 create mode 100644 baselines/STGODE/METR-LA.py
 rename baselines/{DLinear => STGODE}/PEMS-BAY.py (84%)
 create mode 100644 baselines/STGODE/PEMS04.py
 rename baselines/{DLinear => STGODE}/PEMS08.py (84%)
 create mode 100644 baselines/STGODE/arch/__init__.py
 create mode 100644 baselines/STGODE/arch/odegcn.py
 create mode 100644 baselines/STGODE/arch/stgode.py
 create mode 100644 baselines/STGODE/generate_matrices.py
 delete mode 100644 baselines/STID/run.sh
 delete mode 100644 baselines/STNorm/run.sh
 delete mode 100644 baselines/STWave/run.sh
 delete mode 100644 baselines/StemGNN/run.sh
 delete mode 100644 baselines/TimesNet/run.sh
 create mode 100644 baselines/Triformer/PEMS04_LTSF.py
 create mode 100644 baselines/Triformer/PEMS08_LTSF.py
 delete mode 100644 baselines/Triformer/run.sh
 create mode 100644 baselines/WaveNet/METR-LA.py
 create mode 100644 baselines/WaveNet/arch.py

diff --git a/baselines/AGCRN/run.sh b/baselines/AGCRN/run.sh
deleted file mode 100644
index 2927c384..00000000
--- a/baselines/AGCRN/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/AGCRN/METR-LA.py --gpus '0'
-python experiments/train.py -c baselines/AGCRN/PEMS-BAY.py --gpus '0'
-python experiments/train.py -c baselines/AGCRN/PEMS03.py --gpus '0'
-python experiments/train.py -c baselines/AGCRN/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/AGCRN/PEMS07.py --gpus '0'
-python experiments/train.py -c baselines/AGCRN/PEMS08.py --gpus '0'
diff --git a/baselines/Autoformer/PEMS04_LTSF.py b/baselines/Autoformer/PEMS04_LTSF.py
new file mode 100644
index 00000000..e4810408
--- /dev/null
+++ b/baselines/Autoformer/PEMS04_LTSF.py
@@ -0,0 +1,156 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Autoformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 720 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Autoformer
+NUM_NODES = 307
+MODEL_PARAM = {
+    "seq_len": INPUT_LEN,
+    "label_len": INPUT_LEN/2,       # start token length used in decoder
+    "pred_len": OUTPUT_LEN,         # prediction sequence length
+    "moving_avg": 65,                           # window size of moving average. This is a CRUCIAL hyper-parameter.
+    "output_attention": False,
+    "enc_in": NUM_NODES,                        # num nodes
+    "dec_in": NUM_NODES,
+    "c_out": NUM_NODES,
+    "d_model": 512,
+    "embed": "timeF",                           # [timeF, fixed, learned]
+    "dropout": 0.05,
+    "factor": 6,                                # attn factor
+    "n_heads": 8,
+    "d_ff": 2048,
+    "activation": "gelu",
+    "e_layers": 2,                              # num of encoder layers
+    "d_layers": 1,                              # num of decoder layers
+    "num_time_features": 2,                     # number of used time features
+    "time_of_day_size": 288,
+    "day_of_week_size": 7,
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.0005,
+    "weight_decay": 0.0005,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 25, 50],
+    "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/Autoformer/PEMS08_LTSF.py b/baselines/Autoformer/PEMS08_LTSF.py
new file mode 100644
index 00000000..e98a4bbc
--- /dev/null
+++ b/baselines/Autoformer/PEMS08_LTSF.py
@@ -0,0 +1,156 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Autoformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Autoformer
+NUM_NODES = 170
+MODEL_PARAM = {
+    "seq_len": INPUT_LEN,
+    "label_len": INPUT_LEN/2,       # start token length used in decoder
+    "pred_len": OUTPUT_LEN,         # prediction sequence length
+    "moving_avg": 65,                           # window size of moving average. This is a CRUCIAL hyper-parameter.
+    "output_attention": False,
+    "enc_in": NUM_NODES,                        # num nodes
+    "dec_in": NUM_NODES,
+    "c_out": NUM_NODES,
+    "d_model": 512,
+    "embed": "timeF",                           # [timeF, fixed, learned]
+    "dropout": 0.05,
+    "factor": 6,                                # attn factor
+    "n_heads": 8,
+    "d_ff": 2048,
+    "activation": "gelu",
+    "e_layers": 2,                              # num of encoder layers
+    "d_layers": 1,                              # num of decoder layers
+    "num_time_features": 2,                     # number of used time features
+    "time_of_day_size": 288,
+    "day_of_week_size": 7,
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.0005,
+    "weight_decay": 0.0005,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 25, 50],
+    "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/Autoformer/run.sh b/baselines/Autoformer/run.sh
deleted file mode 100644
index e1aa2578..00000000
--- a/baselines/Autoformer/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-# #!/bin/bash
-python experiments/train.py -c baselines/Autoformer/ETTh1.py --gpus '1'
-python experiments/train.py -c baselines/Autoformer/ETTh2.py --gpus '1'
-python experiments/train.py -c baselines/Autoformer/ETTm1.py --gpus '1'
-python experiments/train.py -c baselines/Autoformer/ETTm2.py --gpus '1'
-python experiments/train.py -c baselines/Autoformer/Electricity.py --gpus '1'
-python experiments/train.py -c baselines/Autoformer/ExchangeRate.py --gpus '1'
-python experiments/train.py -c baselines/Autoformer/Weather.py --gpus '1'
-python experiments/train.py -c baselines/Autoformer/PEMS04.py --gpus '1'
-python experiments/train.py -c baselines/Autoformer/PEMS08.py --gpus '1'
diff --git a/baselines/Crossformer/PEMS04_LTSF.py b/baselines/Crossformer/PEMS04_LTSF.py
new file mode 100644
index 00000000..5d804b71
--- /dev/null
+++ b/baselines/Crossformer/PEMS04_LTSF.py
@@ -0,0 +1,146 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Crossformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 192 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Crossformer
+NUM_NODES = 307
+MODEL_PARAM = {
+    "data_dim": NUM_NODES,
+    "in_len": INPUT_LEN,
+    "out_len": OUTPUT_LEN,
+    "seg_len": 24,
+    "win_size": 2,
+    # default parameters
+    "factor": 10,
+    "d_model": 256,
+    "d_ff": 512,
+    "n_heads": 4,
+    "e_layers": 3,
+    "dropout": 0.2,
+    "baseline": False
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.0002,
+    "weight_decay": 0.0005,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 5],
+    "gamma": 0.5
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/Crossformer/PEMS08_LTSF.py b/baselines/Crossformer/PEMS08_LTSF.py
new file mode 100644
index 00000000..d57608a3
--- /dev/null
+++ b/baselines/Crossformer/PEMS08_LTSF.py
@@ -0,0 +1,146 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Crossformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Crossformer
+NUM_NODES = 170
+MODEL_PARAM = {
+    "data_dim": NUM_NODES,
+    "in_len": INPUT_LEN,
+    "out_len": OUTPUT_LEN,
+    "seg_len": 24,
+    "win_size": 2,
+    # default parameters
+    "factor": 10,
+    "d_model": 256,
+    "d_ff": 512,
+    "n_heads": 4,
+    "e_layers": 3,
+    "dropout": 0.2,
+    "baseline": False
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.0002,
+    "weight_decay": 0.0005,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 5],
+    "gamma": 0.5
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/Crossformer/run.sh b/baselines/Crossformer/run.sh
deleted file mode 100644
index 4bcfc8fe..00000000
--- a/baselines/Crossformer/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/Crossformer/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/Crossformer/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/Crossformer/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/Crossformer/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/Crossformer/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/Crossformer/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/Crossformer/Weather.py --gpus '0'
-python experiments/train.py -c baselines/Crossformer/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/Crossformer/PEMS08.py --gpus '0'
diff --git a/baselines/D2STGNN/run.sh b/baselines/D2STGNN/run.sh
deleted file mode 100644
index 680dbff7..00000000
--- a/baselines/D2STGNN/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/D2STGNN/METR-LA.py --gpus '2'
-python experiments/train.py -c baselines/D2STGNN/PEMS-BAY.py --gpus '2'
-python experiments/train.py -c baselines/D2STGNN/PEMS03.py --gpus '2'
-python experiments/train.py -c baselines/D2STGNN/PEMS04.py --gpus '2'
-python experiments/train.py -c baselines/D2STGNN/PEMS07.py --gpus '2'
-python experiments/train.py -c baselines/D2STGNN/PEMS08.py --gpus '2'
diff --git a/baselines/DCRNN/run.sh b/baselines/DCRNN/run.sh
deleted file mode 100644
index 679b7b88..00000000
--- a/baselines/DCRNN/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/DCRNN/METR-LA.py --gpus '1'
-python experiments/train.py -c baselines/DCRNN/PEMS-BAY.py --gpus '1'
-python experiments/train.py -c baselines/DCRNN/PEMS03.py --gpus '1'
-python experiments/train.py -c baselines/DCRNN/PEMS04.py --gpus '1'
-python experiments/train.py -c baselines/DCRNN/PEMS07.py --gpus '1'
-python experiments/train.py -c baselines/DCRNN/PEMS08.py --gpus '1'
diff --git a/baselines/DGCRN/run.sh b/baselines/DGCRN/run.sh
deleted file mode 100644
index 1b108cb2..00000000
--- a/baselines/DGCRN/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/DGCRN/METR-LA.py --gpus '2'
-python experiments/train.py -c baselines/DGCRN/PEMS-BAY.py --gpus '2'
-python experiments/train.py -c baselines/DGCRN/PEMS03.py --gpus '2'
-python experiments/train.py -c baselines/DGCRN/PEMS04.py --gpus '2'
-python experiments/train.py -c baselines/DGCRN/PEMS07.py --gpus '2'
-python experiments/train.py -c baselines/DGCRN/PEMS08.py --gpus '2'
diff --git a/baselines/DLinear/PEMS04.py b/baselines/DLinear/PEMS04_LTSF.py
similarity index 91%
rename from baselines/DLinear/PEMS04.py
rename to baselines/DLinear/PEMS04_LTSF.py
index a74f5035..f7ecca9e 100644
--- a/baselines/DLinear/PEMS04.py
+++ b/baselines/DLinear/PEMS04_LTSF.py
@@ -3,7 +3,7 @@
 from easydict import EasyDict
 sys.path.append(os.path.abspath(__file__ + '/../../..'))
 
-from basicts.metrics import masked_mae, masked_mse
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
 from basicts.data import TimeSeriesForecastingDataset
 from basicts.runners import SimpleTimeSeriesForecastingRunner
 from basicts.scaler import ZScoreScaler
@@ -15,8 +15,10 @@
 # Dataset & Metrics configuration
 DATA_NAME = 'PEMS04'  # Dataset name
 regular_settings = get_regular_settings(DATA_NAME)
-INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
-OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
 TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
 NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
 RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
@@ -78,7 +80,8 @@
 # Metrics settings
 CFG.METRICS.FUNCS = EasyDict({
                                 'MAE': masked_mae,
-                                'MSE': masked_mse
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
                             })
 CFG.METRICS.TARGET = 'MAE'
 CFG.METRICS.NULL_VAL = NULL_VAL
@@ -131,6 +134,6 @@
 CFG.EVAL = EasyDict()
 
 # Evaluation parameters
-CFG.EVAL.HORIZONS = [3, 6, 12]
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
 CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
 CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/DLinear/METR-LA.py b/baselines/DLinear/PEMS08_LTSF.py
similarity index 89%
rename from baselines/DLinear/METR-LA.py
rename to baselines/DLinear/PEMS08_LTSF.py
index 20d120dd..fd922342 100644
--- a/baselines/DLinear/METR-LA.py
+++ b/baselines/DLinear/PEMS08_LTSF.py
@@ -3,7 +3,7 @@
 from easydict import EasyDict
 sys.path.append(os.path.abspath(__file__ + '/../../..'))
 
-from basicts.metrics import masked_mae, masked_mse
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
 from basicts.data import TimeSeriesForecastingDataset
 from basicts.runners import SimpleTimeSeriesForecastingRunner
 from basicts.scaler import ZScoreScaler
@@ -13,10 +13,12 @@
 
 ############################## Hot Parameters ##############################
 # Dataset & Metrics configuration
-DATA_NAME = 'METR-LA'  # Dataset name
+DATA_NAME = 'PEMS08'  # Dataset name
 regular_settings = get_regular_settings(DATA_NAME)
-INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
-OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
 TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
 NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
 RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
@@ -27,7 +29,7 @@
     "seq_len": INPUT_LEN,
     "pred_len": OUTPUT_LEN,
     "individual": False,
-    "enc_in": 207
+    "enc_in": 170
 }
 NUM_EPOCHS = 100
 
@@ -78,7 +80,8 @@
 # Metrics settings
 CFG.METRICS.FUNCS = EasyDict({
                                 'MAE': masked_mae,
-                                'MSE': masked_mse
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
                             })
 CFG.METRICS.TARGET = 'MAE'
 CFG.METRICS.NULL_VAL = NULL_VAL
@@ -131,6 +134,6 @@
 CFG.EVAL = EasyDict()
 
 # Evaluation parameters
-CFG.EVAL.HORIZONS = [3, 6, 12]
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
 CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
 CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/DLinear/run.sh b/baselines/DLinear/run.sh
deleted file mode 100644
index 910b0972..00000000
--- a/baselines/DLinear/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/DLinear/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/DLinear/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/DLinear/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/DLinear/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/DLinear/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/DLinear/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/DLinear/Weather.py --gpus '0'
-python experiments/train.py -c baselines/DLinear/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/DLinear/PEMS08.py --gpus '0'
diff --git a/baselines/DSFormer/PEMS04_LTSF.py b/baselines/DSFormer/PEMS04_LTSF.py
new file mode 100644
index 00000000..66afe847
--- /dev/null
+++ b/baselines/DSFormer/PEMS04_LTSF.py
@@ -0,0 +1,145 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import DSFormer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DSFormer
+NUM_NODES = 307
+MODEL_PARAM = {
+    "Input_len": INPUT_LEN,
+    "out_len": OUTPUT_LEN,
+    "num_id": NUM_NODES,
+    "num_layer": 1,
+    "dropout": 0.3,
+    "muti_head": 2,
+    "num_samp": 3,
+    "IF_node": True,
+    "IF_REVIN":True
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.002,
+    "weight_decay": 0.0001,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 10, 25, 50, 75, 100],
+    "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 32
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/DSFormer/PEMS08_LTSF.py b/baselines/DSFormer/PEMS08_LTSF.py
new file mode 100644
index 00000000..3611ae3c
--- /dev/null
+++ b/baselines/DSFormer/PEMS08_LTSF.py
@@ -0,0 +1,145 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import DSFormer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DSFormer
+NUM_NODES = 170
+MODEL_PARAM = {
+    "Input_len": INPUT_LEN,
+    "out_len": OUTPUT_LEN,
+    "num_id": NUM_NODES,
+    "num_layer": 1,
+    "dropout": 0.3,
+    "muti_head": 2,
+    "num_samp": 3,
+    "IF_node": True,
+    "IF_REVIN":True
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.002,
+    "weight_decay": 0.0001,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 10, 25, 50, 75, 100],
+    "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 32
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/DSFormer/run.sh b/baselines/DSFormer/run.sh
deleted file mode 100644
index 0f28e838..00000000
--- a/baselines/DSFormer/run.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/DSFormer/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/Weather.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/Illness.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/Traffic.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/DSFormer/PEMS08.py --gpus '0'
diff --git a/baselines/DeepAR/ETTh1.py b/baselines/DeepAR/ETTh1.py
new file mode 100644
index 00000000..57631eea
--- /dev/null
+++ b/baselines/DeepAR/ETTh1.py
@@ -0,0 +1,129 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import DeepAR
+from .runner import DeepARRunner
+from .loss import gaussian_loss
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTh1'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DeepAR
+MODEL_PARAM = {
+    'cov_feat_size' : 2,
+    'embedding_size' : 32,
+    'hidden_size' : 64,
+    'num_layers': 3,
+    'use_ts_id'   : True,
+    'id_feat_size': 32,
+    'num_nodes': 7
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = DeepARRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MSE': masked_mse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+    'lr':0.003,
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/DeepAR/ETTm1.py b/baselines/DeepAR/ETTm1.py
new file mode 100644
index 00000000..d9d13e0a
--- /dev/null
+++ b/baselines/DeepAR/ETTm1.py
@@ -0,0 +1,129 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import DeepAR
+from .runner import DeepARRunner
+from .loss import gaussian_loss
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ETTm1'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DeepAR
+MODEL_PARAM = {
+    'cov_feat_size' : 2,
+    'embedding_size' : 32,
+    'hidden_size' : 64,
+    'num_layers': 3,
+    'use_ts_id'   : True,
+    'id_feat_size': 32,
+    'num_nodes': 7
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = DeepARRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MSE': masked_mse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+    'lr':0.003,
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/DeepAR/Electricity.py b/baselines/DeepAR/Electricity.py
new file mode 100644
index 00000000..dd5c28e5
--- /dev/null
+++ b/baselines/DeepAR/Electricity.py
@@ -0,0 +1,129 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import DeepAR
+from .runner import DeepARRunner
+from .loss import gaussian_loss
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Electricity'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DeepAR
+MODEL_PARAM = {
+    'cov_feat_size' : 2,
+    'embedding_size' : 32,
+    'hidden_size' : 64,
+    'num_layers': 3,
+    'use_ts_id'   : True,
+    'id_feat_size': 32,
+    'num_nodes': 321
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = DeepARRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MSE': masked_mse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+    'lr':0.003,
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/DeepAR/ExchangeRate.py b/baselines/DeepAR/ExchangeRate.py
new file mode 100644
index 00000000..eda9cffd
--- /dev/null
+++ b/baselines/DeepAR/ExchangeRate.py
@@ -0,0 +1,130 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import DeepAR
+from .runner import DeepARRunner
+from .loss import gaussian_loss
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'ExchangeRate'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DeepAR
+NUM_NODES = 8
+MODEL_PARAM = {
+    'cov_feat_size' : 2,
+    'embedding_size' : 32,
+    'hidden_size' : 64,
+    'num_layers': 3,
+    'use_ts_id'   : True,
+    'id_feat_size': 32,
+    'num_nodes': 7
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = DeepARRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MSE': masked_mse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+    'lr':0.003,
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/DeepAR/PEMS04.py b/baselines/DeepAR/PEMS04.py
new file mode 100644
index 00000000..d3f1f9e2
--- /dev/null
+++ b/baselines/DeepAR/PEMS04.py
@@ -0,0 +1,130 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import DeepAR
+from .runner import DeepARRunner
+from .loss import gaussian_loss
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture anad parameters
+MODEL_ARCH = DeepAR
+MODEL_PARAM = {
+    'cov_feat_size' : 2,
+    'embedding_size' : 32,
+    'hidden_size' : 64,
+    'num_layers': 3,
+    'use_ts_id'   : True,
+    'id_feat_size': 32,
+    'num_nodes': 307
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = DeepARRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+    'lr':0.003,
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 32
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 16
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12]
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/DeepAR/PEMS04_LTSF.py b/baselines/DeepAR/PEMS04_LTSF.py
new file mode 100644
index 00000000..be949233
--- /dev/null
+++ b/baselines/DeepAR/PEMS04_LTSF.py
@@ -0,0 +1,132 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import DeepAR
+from .runner import DeepARRunner
+from .loss import gaussian_loss
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 96 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture anad parameters
+MODEL_ARCH = DeepAR
+MODEL_PARAM = {
+    'cov_feat_size' : 2,
+    'embedding_size' : 32,
+    'hidden_size' : 64,
+    'num_layers': 3,
+    'use_ts_id'   : True,
+    'id_feat_size': 32,
+    'num_nodes': 307
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = DeepARRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+    'lr':0.001,
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 32
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 16
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/DeepAR/PEMS08_LTSF.py b/baselines/DeepAR/PEMS08_LTSF.py
new file mode 100644
index 00000000..d1972bc3
--- /dev/null
+++ b/baselines/DeepAR/PEMS08_LTSF.py
@@ -0,0 +1,132 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import DeepAR
+from .runner import DeepARRunner
+from .loss import gaussian_loss
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 96 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture anad parameters
+MODEL_ARCH = DeepAR
+MODEL_PARAM = {
+    'cov_feat_size' : 2,
+    'embedding_size' : 32,
+    'hidden_size' : 64,
+    'num_layers': 3,
+    'use_ts_id'   : True,
+    'id_feat_size': 32,
+    'num_nodes': 170
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = DeepARRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+    'lr':0.003,
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 16
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 16
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = False # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/DeepAR/Weather.py b/baselines/DeepAR/Weather.py
new file mode 100644
index 00000000..fb4c1c2c
--- /dev/null
+++ b/baselines/DeepAR/Weather.py
@@ -0,0 +1,129 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import DeepAR
+from .runner import DeepARRunner
+from .loss import gaussian_loss
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'Weather'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = DeepAR
+MODEL_PARAM = {
+    'cov_feat_size' : 2,
+    'embedding_size' : 32,
+    'hidden_size' : 64,
+    'num_layers': 3,
+    'use_ts_id'   : True,
+    'id_feat_size': 32,
+    'num_nodes': 21
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = DeepARRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MSE': masked_mse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = gaussian_loss
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = 'Adam'
+CFG.TRAIN.OPTIM.PARAM = {
+    'lr':0.003,
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/DeepAR/arch/__init__.py b/baselines/DeepAR/arch/__init__.py
new file mode 100644
index 00000000..6ec10582
--- /dev/null
+++ b/baselines/DeepAR/arch/__init__.py
@@ -0,0 +1 @@
+from .deepar import DeepAR
\ No newline at end of file
diff --git a/baselines/DeepAR/arch/deepar.py b/baselines/DeepAR/arch/deepar.py
new file mode 100644
index 00000000..14a66cc5
--- /dev/null
+++ b/baselines/DeepAR/arch/deepar.py
@@ -0,0 +1,100 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .distributions import Gaussian
+
+
+class DeepAR(nn.Module):
+    """
+    Paper: DeepAR: Probabilistic Forecasting with Autoregressive Recurrent Networks; Link: https://arxiv.org/abs/1704.04110; Ref Code: https://github.com/jingw2/demand_forecast, https://github.com/husnejahan/DeepAR-pytorch, https://github.com/arrigonialberto86/deepar.
+    """
+
+    def __init__(self, cov_feat_size, embedding_size, hidden_size, num_layers, use_ts_id, id_feat_size=0, num_nodes=0) -> None:
+        """Init DeepAR.
+
+        Args:
+            cov_feat_size (int): covariate feature size (e.g. time in day, day in week, etc.).
+            embedding_size (int): output size of the input embedding layer.
+            hidden_size (int): hidden size of the LSTM.
+            num_layers (int): number of LSTM layers.
+            use_ts_id (bool): whether to use time series id to construct spatial id embedding as additional features.
+            id_feat_size (int, optional): size of the spatial id embedding. Defaults to 0.
+            num_nodes (int, optional): number of nodes. Defaults to 0.
+        """
+        super().__init__()
+        self.use_ts_id = use_ts_id
+        # input embedding layer
+        self.input_embed = nn.Linear(1, embedding_size)
+        # spatial id embedding layer
+        if use_ts_id:
+            assert id_feat_size > 0, "id_feat_size must be greater than 0 if use_ts_id is True"
+            assert num_nodes > 0, "num_nodes must be greater than 0 if use_ts_id is True"
+            self.id_feat = nn.Parameter(torch.empty(num_nodes, id_feat_size))
+            nn.init.xavier_uniform_(self.id_feat)
+        else:
+            id_feat_size = 0
+        # the LSTM layer
+        self.encoder = nn.LSTM(embedding_size+cov_feat_size+id_feat_size, hidden_size, num_layers, bias=True, batch_first=True)
+        # the likelihood function
+        self.likelihood_layer = Gaussian(hidden_size, 1)
+
+    def gaussian_sample(self, mu, sigma):
+        """Sampling.
+
+        Args:
+            mu (torch.Tensor): mean values of distributions.
+            sigma (torch.Tensor): std values of distributions.
+        """
+        mu = mu.squeeze(1)
+        sigma = sigma.squeeze(1)
+        gaussian = torch.distributions.Normal(mu, sigma)
+        ypred = gaussian.sample([1]).squeeze(0)
+        return ypred
+
+    def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, train: bool, **kwargs) -> torch.Tensor:
+        """Feed forward of DeepAR.
+        Reference code: https://github.com/jingw2/demand_forecast/blob/master/deepar.py
+
+        Args:
+            history_data (torch.Tensor): history data. [B, L, N, C].
+            future_data (torch.Tensor): future data. [B, L, N, C].
+            train (bool): is training or not.
+        """
+        history_next = None
+        preds = []
+        mus = []
+        sigmas = []
+        len_in, len_out = history_data.shape[1], future_data.shape[1]
+        B, _, N, C = history_data.shape
+        input_feat_full = torch.cat([history_data[:, :, :, 0:1], future_data[:, :, :, 0:1]], dim=1) # B, L_in+L_out, N, 1
+        covar_feat_full = torch.cat([history_data[:, :, :, 1:], future_data[:, :, :, 1:]], dim=1) # B, L_in+L_out, N, C-1
+
+        for t in range(1, len_in + len_out):
+            if not (t > len_in and not train): # not in the decoding stage when inferecing
+                history_next = input_feat_full[:, t-1:t, :, 0:1]
+            embed_feat = self.input_embed(history_next)
+            covar_feat = covar_feat_full[:, t:t+1, :, :]
+            if self.use_ts_id:
+                id_feat = self.id_feat.unsqueeze(0).expand(history_data.shape[0], -1, -1).unsqueeze(1)
+                encoder_input = torch.cat([embed_feat, covar_feat, id_feat], dim=-1)
+            else:
+                encoder_input = torch.cat([embed_feat, covar_feat], dim=-1)
+            # lstm
+            B, _, N, C = encoder_input.shape # _ is 1
+            encoder_input = encoder_input.transpose(1, 2).reshape(B * N, -1, C)
+            _, (h, c) = self.encoder(encoder_input) if t == 1 else self.encoder(encoder_input, (h, c))
+            # distribution proj
+            mu, sigma = self.likelihood_layer(F.relu(h[-1, :, :]))
+            history_next = self.gaussian_sample(mu, sigma).view(B, N).view(B, 1, N, 1)
+            mus.append(mu.view(B, N, 1).unsqueeze(1))
+            sigmas.append(sigma.view(B, N, 1).unsqueeze(1))
+            preds.append(history_next)
+            assert not torch.isnan(history_next).any()
+
+        preds = torch.concat(preds, dim=1)
+        mus = torch.concat(mus, dim=1)
+        sigmas = torch.concat(sigmas, dim=1)
+        reals = input_feat_full[:, -preds.shape[1]:, :, :]
+
+        return {"prediction": preds, "target": reals, "mus": mus, "sigmas": sigmas}
diff --git a/baselines/DeepAR/arch/distributions.py b/baselines/DeepAR/arch/distributions.py
new file mode 100644
index 00000000..0c84d512
--- /dev/null
+++ b/baselines/DeepAR/arch/distributions.py
@@ -0,0 +1,22 @@
+import torch
+import torch.nn as nn
+
+
+class Gaussian(nn.Module):
+
+    def __init__(self, hidden_size, output_size):
+        """
+        Gaussian Likelihood Supports Continuous Data
+        Args:
+        input_size (int): hidden h_{i,t} column size
+        output_size (int): embedding size
+        """
+        super(Gaussian, self).__init__()
+        self.mu_layer = nn.Linear(hidden_size, output_size)
+        self.sigma_layer = nn.Linear(hidden_size, output_size)
+
+    def forward(self, h):
+        sigma_t = torch.log(1 + torch.exp(self.sigma_layer(h))) + 1e-6
+        sigma_t = sigma_t.squeeze(0)
+        mu_t = self.mu_layer(h).squeeze(0)
+        return mu_t, sigma_t
diff --git a/baselines/DeepAR/loss/__init__.py b/baselines/DeepAR/loss/__init__.py
new file mode 100644
index 00000000..9b08b8a3
--- /dev/null
+++ b/baselines/DeepAR/loss/__init__.py
@@ -0,0 +1 @@
+from .gaussian import gaussian_loss
\ No newline at end of file
diff --git a/baselines/DeepAR/loss/gaussian.py b/baselines/DeepAR/loss/gaussian.py
new file mode 100644
index 00000000..9d5c5b96
--- /dev/null
+++ b/baselines/DeepAR/loss/gaussian.py
@@ -0,0 +1,29 @@
+import torch
+import numpy as np
+
+
+def gaussian_loss(prediction, target, mus, sigmas, null_val = np.nan):
+    """Masked gaussian loss. Kindly note that the gaussian loss is calculated based on mu, sigma, and target. The prediction is sampled from N(mu, sigma), and is not used in the loss calculation (it will be used in the metrics calculation).
+
+    Args:
+        prediction (torch.Tensor): prediction of model. [B, L, N, 1].
+        target (torch.Tensor): ground truth. [B, L, N, 1].
+        mus (torch.Tensor): the mean of gaussian distribution. [B, L, N, 1].
+        sigmas (torch.Tensor): the std of gaussian distribution. [B, L, N, 1]
+        null_val (optional): null value. Defaults to np.nan.
+    """
+    # mask
+    if np.isnan(null_val):
+        mask = ~torch.isnan(target)
+    else:
+        eps = 5e-5
+        mask = ~torch.isclose(target, torch.tensor(null_val).expand_as(target).to(target.device), atol=eps, rtol=0.)
+    mask = mask.float()
+    mask /= torch.mean((mask))
+    mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
+
+    distribution = torch.distributions.Normal(mus, sigmas)
+    likelihood = distribution.log_prob(target)
+    likelihood = likelihood * mask
+    loss_g = -torch.mean(likelihood)
+    return loss_g
diff --git a/baselines/DeepAR/runner/__init__.py b/baselines/DeepAR/runner/__init__.py
new file mode 100644
index 00000000..1e41b855
--- /dev/null
+++ b/baselines/DeepAR/runner/__init__.py
@@ -0,0 +1 @@
+from .deepar_runner import DeepARRunner
diff --git a/baselines/DeepAR/runner/deepar_runner.py b/baselines/DeepAR/runner/deepar_runner.py
new file mode 100644
index 00000000..6f132ba8
--- /dev/null
+++ b/baselines/DeepAR/runner/deepar_runner.py
@@ -0,0 +1,150 @@
+import os
+import json
+from typing import Dict, Optional
+
+import torch
+import numpy as np
+from tqdm import tqdm
+from easytorch.utils.dist import master_only
+
+from basicts.runners import BaseTimeSeriesForecastingRunner
+
+
+class DeepARRunner(BaseTimeSeriesForecastingRunner):
+    def __init__(self, cfg: dict):
+        super().__init__(cfg)
+        self.forward_features = cfg["MODEL"].get("FORWARD_FEATURES", None)
+        self.target_features = cfg["MODEL"].get("TARGET_FEATURES", None)
+        self.output_seq_len = cfg["DATASET"]["PARAM"]["output_len"]
+
+    def select_input_features(self, data: torch.Tensor) -> torch.Tensor:
+        """Select input features and reshape data to fit the target model.
+
+        Args:
+            data (torch.Tensor): input history data, shape [B, L, N, C].
+
+        Returns:
+            torch.Tensor: reshaped data
+        """
+
+        # select feature using self.forward_features
+        if self.forward_features is not None:
+            data = data[:, :, :, self.forward_features]
+        return data
+
+    def select_target_features(self, data: torch.Tensor) -> torch.Tensor:
+        """Select target features and reshape data back to the BasicTS framework
+
+        Args:
+            data (torch.Tensor): prediction of the model with arbitrary shape.
+
+        Returns:
+            torch.Tensor: reshaped data with shape [B, L, N, C]
+        """
+
+        # select feature using self.target_features
+        data = data[:, :, :, self.target_features]
+        return data
+
+    def postprocessing(self, input_data: Dict) -> Dict:
+        """Postprocess data.
+
+        Args:
+            input_data (Dict): Dictionary containing data to be processed.
+
+        Returns:
+            Dict: Processed data.
+        """
+
+        if self.scaler is not None and self.scaler.rescale:
+            input_data['prediction'] = self.scaler.inverse_transform(input_data['prediction'])
+            input_data['target'] = self.scaler.inverse_transform(input_data['target'])
+            input_data['inputs'] = self.scaler.inverse_transform(input_data['inputs'])
+            if "mus" in input_data.keys():
+                input_data['mus'] = self.scaler.inverse_transform(input_data['mus'])
+            if "sigmas" in input_data.keys():
+                input_data['sigmas'] = self.scaler.inverse_transform(input_data['sigmas'])
+        # TODO: add more postprocessing steps as needed.
+        return input_data
+
+    @torch.no_grad()
+    @master_only
+    def test(self, train_epoch: Optional[int] = None, save_metrics: bool = False, save_results: bool = False) -> Dict:
+        """Test process.
+        
+        Args:
+            train_epoch (Optional[int]): Current epoch if in training process.
+            save_metrics (bool): Save the test metrics. Defaults to False.
+            save_results (bool): Save the test results. Defaults to False.
+        """
+
+        prediction, target, inputs = [], [], []
+
+        for data in tqdm(self.test_data_loader):
+            data = self.preprocessing(data)
+            forward_return = self.forward(data, epoch=None, iter_num=None, train=False)
+            forward_return = self.postprocessing(forward_return)
+
+            if not self.if_evaluate_on_gpu:
+                forward_return['prediction'] = forward_return['prediction'].detach().cpu()
+                forward_return['target'] = forward_return['target'].detach().cpu()
+                forward_return['inputs'] = forward_return['inputs'].detach().cpu()
+
+            prediction.append(forward_return['prediction'])
+            target.append(forward_return['target'])
+            inputs.append(forward_return['inputs'])
+
+        prediction = torch.cat(prediction, dim=0)
+        target = torch.cat(target, dim=0)
+        inputs = torch.cat(inputs, dim=0)
+
+        returns_all = {'prediction': prediction[:, -self.output_seq_len:, :, :],
+                        'target': target[:, -self.output_seq_len:, :, :],
+                        'inputs': inputs}
+        metrics_results = self.compute_evaluation_metrics(returns_all)
+
+        # save
+        if save_results:
+            # save returns_all to self.ckpt_save_dir/test_results.npz
+            test_results = {k: v.cpu().numpy() for k, v in returns_all.items()}
+            np.savez(os.path.join(self.ckpt_save_dir, 'test_results.npz'), **test_results)
+
+        if save_metrics:
+            # save metrics_results to self.ckpt_save_dir/test_metrics.json
+            with open(os.path.join(self.ckpt_save_dir, 'test_metrics.json'), 'w') as f:
+                json.dump(metrics_results, f, indent=4)
+
+        return returns_all
+
+    def forward(self, data: tuple, epoch:int = None, iter_num: int = None, train:bool = True, **kwargs) -> tuple:
+        """feed forward process for train, val, and test. Note that the outputs are NOT re-scaled.
+
+        Args:
+            data (tuple): data (future data, history data). [B, L, N, C] for each of them
+            epoch (int, optional): epoch number. Defaults to None.
+            iter_num (int, optional): iteration number. Defaults to None.
+            train (bool, optional): if in the training process. Defaults to True.
+
+        Returns:
+            dict: keys that must be included: inputs, prediction, target
+        """
+
+        # Preprocess input data
+        future_data, history_data = data['target'], data['inputs']
+        history_data = self.to_running_device(history_data)  # Shape: [B, L, N, C]
+        future_data = self.to_running_device(future_data)    # Shape: [B, L, N, C]
+
+        # Select input features
+        history_data = self.select_input_features(history_data)
+        future_data_4_dec = self.select_input_features(future_data)
+
+        # Forward pass through the model
+        model_return = self.model(history_data=history_data, future_data=future_data_4_dec, 
+                                  batch_seen=iter_num, epoch=epoch, train=train)
+
+        # parse model return
+        if isinstance(model_return, torch.Tensor): model_return = {"prediction": model_return}
+        model_return["inputs"] = self.select_target_features(history_data)
+        if "target" not in model_return:
+            model_return["target"] = self.select_target_features(future_data)
+        return model_return
diff --git a/baselines/FEDformer/PEMS04_LTSF.py b/baselines/FEDformer/PEMS04_LTSF.py
new file mode 100644
index 00000000..d0b84408
--- /dev/null
+++ b/baselines/FEDformer/PEMS04_LTSF.py
@@ -0,0 +1,157 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import FEDformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 720 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = FEDformer
+NUM_NODES = 307
+MODEL_PARAM = {
+    "enc_in": NUM_NODES,                        # num nodes
+    "dec_in": NUM_NODES,
+    "c_out": NUM_NODES,
+    "seq_len": INPUT_LEN,           # input sequence length
+    "label_len": INPUT_LEN/2,       # start token length used in decoder
+    "pred_len": OUTPUT_LEN,         # prediction sequence length\
+    "d_model": 512,
+    "version": "Fourier",                       # for FEDformer, there are two versions to choose, options: [Fourier, Wavelets]
+    "moving_avg": 24,                           # window size of moving average
+    "n_heads": 8,
+    "e_layers": 2,                              # num of encoder layers
+    "d_layers": 1,                               # num of decoder layers
+    "d_ff": 2048,
+    "dropout": 0.05,
+    "output_attention": False,
+    "embed": "timeF",                           # [timeF, fixed, learned]
+    "mode_select": "random",                    # for FEDformer, there are two mode selection method, options: [random, low]
+    "modes": 64,                                # modes to be selected random 64
+    "base": "legendre",                         # mwt base
+    "L": 3,                                     # ignore level
+    "cross_activation": "tanh",                 # mwt cross atention activation function tanh or softmax
+    "activation": "gelu",
+    "num_time_features": 2,                     # number of used time features
+    "time_of_day_size": 288,
+    "day_of_week_size": 7
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.0005,
+    "weight_decay": 0.0005,
+}
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 50],
+    "gamma": 0.5
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/FEDformer/PEMS08_LTSF.py b/baselines/FEDformer/PEMS08_LTSF.py
new file mode 100644
index 00000000..a1154f01
--- /dev/null
+++ b/baselines/FEDformer/PEMS08_LTSF.py
@@ -0,0 +1,157 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import FEDformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 720 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = FEDformer
+NUM_NODES = 170
+MODEL_PARAM = {
+    "enc_in": NUM_NODES,                        # num nodes
+    "dec_in": NUM_NODES,
+    "c_out": NUM_NODES,
+    "seq_len": INPUT_LEN,           # input sequence length
+    "label_len": INPUT_LEN/2,       # start token length used in decoder
+    "pred_len": OUTPUT_LEN,         # prediction sequence length\
+    "d_model": 512,
+    "version": "Fourier",                       # for FEDformer, there are two versions to choose, options: [Fourier, Wavelets]
+    "moving_avg": 24,                           # window size of moving average
+    "n_heads": 8,
+    "e_layers": 2,                              # num of encoder layers
+    "d_layers": 1,                               # num of decoder layers
+    "d_ff": 2048,
+    "dropout": 0.05,
+    "output_attention": False,
+    "embed": "timeF",                           # [timeF, fixed, learned]
+    "mode_select": "random",                    # for FEDformer, there are two mode selection method, options: [random, low]
+    "modes": 64,                                # modes to be selected random 64
+    "base": "legendre",                         # mwt base
+    "L": 3,                                     # ignore level
+    "cross_activation": "tanh",                 # mwt cross atention activation function tanh or softmax
+    "activation": "gelu",
+    "num_time_features": 2,                     # number of used time features
+    "time_of_day_size": 288,
+    "day_of_week_size": 7
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.0005,
+    "weight_decay": 0.0005,
+}
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 50],
+    "gamma": 0.5
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/FEDformer/run.sh b/baselines/FEDformer/run.sh
deleted file mode 100644
index 3b656702..00000000
--- a/baselines/FEDformer/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/FEDformer/ETTh1.py --gpus '2'
-python experiments/train.py -c baselines/FEDformer/ETTh2.py --gpus '2'
-python experiments/train.py -c baselines/FEDformer/ETTm1.py --gpus '2'
-python experiments/train.py -c baselines/FEDformer/ETTm2.py --gpus '2'
-python experiments/train.py -c baselines/FEDformer/Electricity.py --gpus '2'
-python experiments/train.py -c baselines/FEDformer/ExchangeRate.py --gpus '2'
-python experiments/train.py -c baselines/FEDformer/Weather.py --gpus '2'
-python experiments/train.py -c baselines/FEDformer/PEMS04.py --gpus '2'
-python experiments/train.py -c baselines/FEDformer/PEMS08.py --gpus '2'
diff --git a/baselines/GTS/run.sh b/baselines/GTS/run.sh
deleted file mode 100644
index a989d902..00000000
--- a/baselines/GTS/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/GTS/METR-LA.py --gpus '1'
-python experiments/train.py -c baselines/GTS/PEMS-BAY.py --gpus '1'
-python experiments/train.py -c baselines/GTS/PEMS03.py --gpus '1'
-python experiments/train.py -c baselines/GTS/PEMS04.py --gpus '1'
-python experiments/train.py -c baselines/GTS/PEMS07.py --gpus '1'
-python experiments/train.py -c baselines/GTS/PEMS08.py --gpus '1'
diff --git a/baselines/GWNet/run.sh b/baselines/GWNet/run.sh
deleted file mode 100644
index e2e08381..00000000
--- a/baselines/GWNet/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/GWNet/METR-LA.py --gpus '1'
-python experiments/train.py -c baselines/GWNet/PEMS-BAY.py --gpus '1'
-python experiments/train.py -c baselines/GWNet/PEMS03.py --gpus '1'
-python experiments/train.py -c baselines/GWNet/PEMS04.py --gpus '1'
-python experiments/train.py -c baselines/GWNet/PEMS07.py --gpus '1'
-python experiments/train.py -c baselines/GWNet/PEMS08.py --gpus '1'
diff --git a/baselines/Informer/PEMS04_LTSF.py b/baselines/Informer/PEMS04_LTSF.py
new file mode 100644
index 00000000..750108bb
--- /dev/null
+++ b/baselines/Informer/PEMS04_LTSF.py
@@ -0,0 +1,161 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Informer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 96 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Informer
+NUM_NODES = 307
+MODEL_PARAM = {
+    "enc_in": NUM_NODES,                              # num nodes
+    "dec_in": NUM_NODES,
+    "c_out": NUM_NODES,
+    "seq_len": INPUT_LEN,           # input sequence length
+    "label_len": INPUT_LEN/2,       # start token length used in decoder
+    "out_len": OUTPUT_LEN,          # prediction sequence length\
+    "factor": 3,                                # probsparse attn factor
+    "d_model": 512,
+    "n_heads": 8,
+    "e_layers": 2,                              # num of encoder layers
+    # "e_layers": [4, 2, 1],                    # for InformerStack
+    "d_layers": 1,                              # num of decoder layers
+    "d_ff": 2048,
+    "dropout": 0.05,
+    "attn": 'prob',                             # attention used in encoder, options:[prob, full]
+    "embed": "timeF",                           # [timeF, fixed, learned]
+    "activation": "gelu",
+    "output_attention": False,
+    "distil": True,                             # whether to use distilling in encoder, using this argument means not using distilling
+    "mix": True,                                # use mix attention in generative decoder
+    "num_time_features": 2,                     # number of used time features [time_of_day, day_of_week, day_of_month, day_of_year]
+    "time_of_day_size": 288,
+    "day_of_week_size": 7,
+    "day_of_month_size": 31,
+    "day_of_year_size": 366
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.002,
+    "weight_decay": 0.0005,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 25, 50],
+    "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/Informer/PEMS08_LTSF.py b/baselines/Informer/PEMS08_LTSF.py
new file mode 100644
index 00000000..a7466c28
--- /dev/null
+++ b/baselines/Informer/PEMS08_LTSF.py
@@ -0,0 +1,161 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Informer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 96 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Informer
+NUM_NODES = 170
+MODEL_PARAM = {
+    "enc_in": NUM_NODES,                              # num nodes
+    "dec_in": NUM_NODES,
+    "c_out": NUM_NODES,
+    "seq_len": INPUT_LEN,           # input sequence length
+    "label_len": INPUT_LEN/2,       # start token length used in decoder
+    "out_len": OUTPUT_LEN,          # prediction sequence length\
+    "factor": 3,                                # probsparse attn factor
+    "d_model": 512,
+    "n_heads": 8,
+    "e_layers": 2,                              # num of encoder layers
+    # "e_layers": [4, 2, 1],                    # for InformerStack
+    "d_layers": 1,                              # num of decoder layers
+    "d_ff": 2048,
+    "dropout": 0.05,
+    "attn": 'prob',                             # attention used in encoder, options:[prob, full]
+    "embed": "timeF",                           # [timeF, fixed, learned]
+    "activation": "gelu",
+    "output_attention": False,
+    "distil": True,                             # whether to use distilling in encoder, using this argument means not using distilling
+    "mix": True,                                # use mix attention in generative decoder
+    "num_time_features": 2,                     # number of used time features [time_of_day, day_of_week, day_of_month, day_of_year]
+    "time_of_day_size": 288,
+    "day_of_week_size": 7,
+    "day_of_month_size": 31,
+    "day_of_year_size": 366
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.002,
+    "weight_decay": 0.0005,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 25, 50],
+    "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/Informer/run.sh b/baselines/Informer/run.sh
deleted file mode 100644
index db4bd1fa..00000000
--- a/baselines/Informer/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/Informer/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/Informer/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/Informer/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/Informer/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/Informer/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/Informer/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/Informer/Weather.py --gpus '0'
-python experiments/train.py -c baselines/Informer/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/Informer/PEMS08.py --gpus '0'
diff --git a/baselines/MTGNN/run.sh b/baselines/MTGNN/run.sh
deleted file mode 100644
index 529202f2..00000000
--- a/baselines/MTGNN/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/MTGNN/METR-LA.py --gpus '3'
-python experiments/train.py -c baselines/MTGNN/PEMS-BAY.py --gpus '3'
-python experiments/train.py -c baselines/MTGNN/PEMS03.py --gpus '3'
-python experiments/train.py -c baselines/MTGNN/PEMS04.py --gpus '3'
-python experiments/train.py -c baselines/MTGNN/PEMS07.py --gpus '3'
-python experiments/train.py -c baselines/MTGNN/PEMS08.py --gpus '3'
diff --git a/baselines/NBeats/PEMS04_LTSF.py b/baselines/NBeats/PEMS04_LTSF.py
new file mode 100644
index 00000000..524caf07
--- /dev/null
+++ b/baselines/NBeats/PEMS04_LTSF.py
@@ -0,0 +1,143 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import NBeats
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NBeats
+NUM_NODES = 307
+MODEL_PARAM = {
+        "type": "generic",
+        "input_size": INPUT_LEN,
+        "output_size": OUTPUT_LEN,
+        "layer_size": 512,
+        "layers": 4,
+        "stacks": 10
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr":0.001,
+    "weight_decay":0,
+    "eps":1.0e-8,
+    "betas":(0.9, 0.95)
+}
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM= {
+    "milestones":[20, 40, 60, 80],
+    "gamma":0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    "max_norm": 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 32
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/NBeats/PEMS08_LTSF.py b/baselines/NBeats/PEMS08_LTSF.py
new file mode 100644
index 00000000..7bdc542b
--- /dev/null
+++ b/baselines/NBeats/PEMS08_LTSF.py
@@ -0,0 +1,143 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import NBeats
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NBeats
+NUM_NODES = 170
+MODEL_PARAM = {
+        "type": "generic",
+        "input_size": INPUT_LEN,
+        "output_size": OUTPUT_LEN,
+        "layer_size": 512,
+        "layers": 4,
+        "stacks": 10
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr":0.001,
+    "weight_decay":0,
+    "eps":1.0e-8,
+    "betas":(0.9, 0.95)
+}
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM= {
+    "milestones":[20, 40, 60, 80],
+    "gamma":0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    "max_norm": 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 32
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/NBeats/run.sh b/baselines/NBeats/run.sh
deleted file mode 100644
index 259cba60..00000000
--- a/baselines/NBeats/run.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/NBeats/METR-LA.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/PEMS-BAY.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/PEMS03.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/PEMS04.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/PEMS07.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/PEMS08.py --gpus '1'
-
-python experiments/train.py -c baselines/NBeats/ETTh1.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/ETTm1.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/Electricity.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/Weather.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/ExchangeRate.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/PEMS04_LTSF.py --gpus '1'
-python experiments/train.py -c baselines/NBeats/PEMS08_LTSF.py --gpus '1'
diff --git a/baselines/NLinear/ETTh1.py b/baselines/NLinear/ETTh1.py
index f8b64380..1ab4931b 100644
--- a/baselines/NLinear/ETTh1.py
+++ b/baselines/NLinear/ETTh1.py
@@ -26,7 +26,6 @@
 MODEL_PARAM = {
     "seq_len": INPUT_LEN,
     "pred_len": OUTPUT_LEN,
-    "individual": False,
     "enc_in": 7
 }
 NUM_EPOCHS = 100
diff --git a/baselines/NLinear/ETTh2.py b/baselines/NLinear/ETTh2.py
index 7cf7a3de..0e1860c3 100644
--- a/baselines/NLinear/ETTh2.py
+++ b/baselines/NLinear/ETTh2.py
@@ -26,7 +26,6 @@
 MODEL_PARAM = {
     "seq_len": INPUT_LEN,
     "pred_len": OUTPUT_LEN,
-    "individual": False,
     "enc_in": 7
 }
 NUM_EPOCHS = 100
diff --git a/baselines/NLinear/ETTm1.py b/baselines/NLinear/ETTm1.py
index 941c4205..289ea338 100644
--- a/baselines/NLinear/ETTm1.py
+++ b/baselines/NLinear/ETTm1.py
@@ -26,7 +26,6 @@
 MODEL_PARAM = {
     "seq_len": INPUT_LEN,
     "pred_len": OUTPUT_LEN,
-    "individual": False,
     "enc_in": 7
 }
 NUM_EPOCHS = 100
diff --git a/baselines/NLinear/ETTm2.py b/baselines/NLinear/ETTm2.py
index 68b14356..55c61c8c 100644
--- a/baselines/NLinear/ETTm2.py
+++ b/baselines/NLinear/ETTm2.py
@@ -26,7 +26,6 @@
 MODEL_PARAM = {
     "seq_len": INPUT_LEN,
     "pred_len": OUTPUT_LEN,
-    "individual": False,
     "enc_in": 7
 }
 NUM_EPOCHS = 100
diff --git a/baselines/NLinear/Electricity.py b/baselines/NLinear/Electricity.py
index 96738ea8..7c91c87c 100644
--- a/baselines/NLinear/Electricity.py
+++ b/baselines/NLinear/Electricity.py
@@ -26,7 +26,6 @@
 MODEL_PARAM = {
     "seq_len": INPUT_LEN,
     "pred_len": OUTPUT_LEN,
-    "individual": False,
     "enc_in": 321
 }
 NUM_EPOCHS = 100
diff --git a/baselines/NLinear/ExchangeRate.py b/baselines/NLinear/ExchangeRate.py
index 41979bb1..ed179712 100644
--- a/baselines/NLinear/ExchangeRate.py
+++ b/baselines/NLinear/ExchangeRate.py
@@ -26,7 +26,6 @@
 MODEL_PARAM = {
     "seq_len": INPUT_LEN,
     "pred_len": OUTPUT_LEN,
-    "individual": False,
     "enc_in": 8
 }
 NUM_EPOCHS = 100
diff --git a/baselines/NLinear/PEMS04_LTSF.py b/baselines/NLinear/PEMS04_LTSF.py
new file mode 100644
index 00000000..e4e93584
--- /dev/null
+++ b/baselines/NLinear/PEMS04_LTSF.py
@@ -0,0 +1,138 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import NLinear
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NLinear
+MODEL_PARAM = {
+    "seq_len": INPUT_LEN,
+    "pred_len": OUTPUT_LEN,
+    "enc_in": 307
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.0003,
+    "weight_decay": 0.0001,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 25],
+    "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/NLinear/PEMS08_LTSF.py b/baselines/NLinear/PEMS08_LTSF.py
new file mode 100644
index 00000000..e4551848
--- /dev/null
+++ b/baselines/NLinear/PEMS08_LTSF.py
@@ -0,0 +1,138 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import NLinear
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = NLinear
+MODEL_PARAM = {
+    "seq_len": INPUT_LEN,
+    "pred_len": OUTPUT_LEN,
+    "enc_in": 170
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.0003,
+    "weight_decay": 0.0001,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 25],
+    "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/NLinear/Weather.py b/baselines/NLinear/Weather.py
index 5002e73a..9cae044d 100644
--- a/baselines/NLinear/Weather.py
+++ b/baselines/NLinear/Weather.py
@@ -26,7 +26,6 @@
 MODEL_PARAM = {
     "seq_len": INPUT_LEN,
     "pred_len": OUTPUT_LEN,
-    "individual": False,
     "enc_in": 21
 }
 NUM_EPOCHS = 100
diff --git a/baselines/NLinear/run.sh b/baselines/NLinear/run.sh
deleted file mode 100644
index b291051c..00000000
--- a/baselines/NLinear/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/NLinear/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/NLinear/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/NLinear/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/NLinear/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/NLinear/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/NLinear/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/NLinear/Weather.py --gpus '0'
-python experiments/train.py -c baselines/NLinear/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/NLinear/PEMS08.py --gpus '0'
diff --git a/baselines/PatchTST/PEMS04_LTSF.py b/baselines/PatchTST/PEMS04_LTSF.py
new file mode 100644
index 00000000..bf2ddfe9
--- /dev/null
+++ b/baselines/PatchTST/PEMS04_LTSF.py
@@ -0,0 +1,155 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import PatchTST
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = PatchTST
+NUM_NODES = 307
+MODEL_PARAM = {
+    "enc_in": NUM_NODES,                        # num nodes
+    "seq_len": INPUT_LEN,           # input sequence length
+    "pred_len": OUTPUT_LEN,         # prediction sequence length
+    "e_layers": 3,                              # num of encoder layers
+    "n_heads": 16,
+    "d_model": 128,
+    "d_ff": 256,
+    "dropout": 0.2,
+    "fc_dropout": 0.2,
+    "head_dropout": 0.0,
+    "patch_len": 32,
+    "stride": 16,
+    "individual": 0,                            # individual head; True 1 False 0
+    "padding_patch": "end",                     # None: None; end: padding on the end
+    "revin": 1,                                 # RevIN; True 1 False 0
+    "affine": 0,                                # RevIN-affine; True 1 False 0
+    "subtract_last": 0,                         # 0: subtract mean; 1: subtract last
+    "decomposition": 0,                         # decomposition; True 1 False 0
+    "kernel_size": 25,                          # decomposition-kernel
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.001,
+    "weight_decay": 0.0005,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 25, 50],
+    "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/PatchTST/PEMS08_LTSF.py b/baselines/PatchTST/PEMS08_LTSF.py
new file mode 100644
index 00000000..68ae229f
--- /dev/null
+++ b/baselines/PatchTST/PEMS08_LTSF.py
@@ -0,0 +1,155 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import PatchTST
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = PatchTST
+NUM_NODES = 170
+MODEL_PARAM = {
+    "enc_in": NUM_NODES,                        # num nodes
+    "seq_len": INPUT_LEN,           # input sequence length
+    "pred_len": OUTPUT_LEN,         # prediction sequence length
+    "e_layers": 3,                              # num of encoder layers
+    "n_heads": 16,
+    "d_model": 128,
+    "d_ff": 256,
+    "dropout": 0.2,
+    "fc_dropout": 0.2,
+    "head_dropout": 0.0,
+    "patch_len": 32,
+    "stride": 16,
+    "individual": 0,                            # individual head; True 1 False 0
+    "padding_patch": "end",                     # None: None; end: padding on the end
+    "revin": 1,                                 # RevIN; True 1 False 0
+    "affine": 0,                                # RevIN-affine; True 1 False 0
+    "subtract_last": 0,                         # 0: subtract mean; 1: subtract last
+    "decomposition": 0,                         # decomposition; True 1 False 0
+    "kernel_size": 25,                          # decomposition-kernel
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.001,
+    "weight_decay": 0.0005,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 25, 50],
+    "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/PatchTST/run.sh b/baselines/PatchTST/run.sh
deleted file mode 100644
index 5802fb23..00000000
--- a/baselines/PatchTST/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/PatchTST/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/PatchTST/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/PatchTST/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/PatchTST/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/PatchTST/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/PatchTST/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/PatchTST/Weather.py --gpus '0'
-python experiments/train.py -c baselines/PatchTST/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/PatchTST/PEMS08.py --gpus '0'
diff --git a/baselines/Pyraformer/PEMS04_LTSF.py b/baselines/Pyraformer/PEMS04_LTSF.py
new file mode 100644
index 00000000..e8c9098b
--- /dev/null
+++ b/baselines/Pyraformer/PEMS04_LTSF.py
@@ -0,0 +1,158 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Pyraformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 96 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Pyraformer
+NUM_NODES = 307
+MODEL_PARAM = {
+    "enc_in": NUM_NODES,                        # num nodes
+    "dec_in": NUM_NODES,
+    "c_out": NUM_NODES,
+    "input_size": INPUT_LEN,
+    "predict_step": OUTPUT_LEN,
+    "d_model": 512,
+    "d_inner_hid": 512,
+    "d_k": 128,
+    "d_v": 128,
+    "d_bottleneck": 128,
+    "n_head": 4,
+    "n_layer": 4,
+    "dropout": 0.05,
+    "decoder": "FC",                            # FC or attention
+    "window_size": "[2, 2, 2]",
+    "inner_size": 5,
+    "CSCM": "Bottleneck_Construct",
+    "truncate": False,
+    "use_tvm": False,
+    "embed": "DataEmbedding",
+    "num_time_features": 2,
+    "time_of_day_size": 288,
+    "day_of_week_size": 7,
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.0005
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 25],
+    "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/Pyraformer/PEMS08_LTSF.py b/baselines/Pyraformer/PEMS08_LTSF.py
new file mode 100644
index 00000000..de6a0537
--- /dev/null
+++ b/baselines/Pyraformer/PEMS08_LTSF.py
@@ -0,0 +1,158 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Pyraformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+INPUT_LEN = 96 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Pyraformer
+NUM_NODES = 170
+MODEL_PARAM = {
+    "enc_in": NUM_NODES,                        # num nodes
+    "dec_in": NUM_NODES,
+    "c_out": NUM_NODES,
+    "input_size": INPUT_LEN,
+    "predict_step": OUTPUT_LEN,
+    "d_model": 512,
+    "d_inner_hid": 512,
+    "d_k": 128,
+    "d_v": 128,
+    "d_bottleneck": 128,
+    "n_head": 4,
+    "n_layer": 4,
+    "dropout": 0.05,
+    "decoder": "FC",                            # FC or attention
+    "window_size": "[2, 2, 2]",
+    "inner_size": 5,
+    "CSCM": "Bottleneck_Construct",
+    "truncate": False,
+    "use_tvm": False,
+    "embed": "DataEmbedding",
+    "num_time_features": 2,
+    "time_of_day_size": 288,
+    "day_of_week_size": 7,
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.0005
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 25],
+    "gamma": 0.5
+}
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    'max_norm': 5.0
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/Pyraformer/run.sh b/baselines/Pyraformer/run.sh
deleted file mode 100644
index b1e61c57..00000000
--- a/baselines/Pyraformer/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/Pyraformer/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/Pyraformer/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/Pyraformer/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/Pyraformer/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/Pyraformer/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/Pyraformer/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/Pyraformer/Weather.py --gpus '0'
-python experiments/train.py -c baselines/Pyraformer/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/Pyraformer/PEMS08.py --gpus '0'
diff --git a/baselines/STAEformer/run.sh b/baselines/STAEformer/run.sh
deleted file mode 100644
index 1e92166e..00000000
--- a/baselines/STAEformer/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/STAEformer/METR-LA.py --gpus '0'
-python experiments/train.py -c baselines/STAEformer/PEMS-BAY.py --gpus '0'
-python experiments/train.py -c baselines/STAEformer/PEMS03.py --gpus '0'
-python experiments/train.py -c baselines/STAEformer/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/STAEformer/PEMS07.py --gpus '0'
-python experiments/train.py -c baselines/STAEformer/PEMS08.py --gpus '0'
diff --git a/baselines/STGCN/run.sh b/baselines/STGCN/run.sh
deleted file mode 100644
index a84201f3..00000000
--- a/baselines/STGCN/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/STGCN/METR-LA.py --gpus '0'
-python experiments/train.py -c baselines/STGCN/PEMS-BAY.py --gpus '0'
-python experiments/train.py -c baselines/STGCN/PEMS03.py --gpus '0'
-python experiments/train.py -c baselines/STGCN/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/STGCN/PEMS07.py --gpus '0'
-python experiments/train.py -c baselines/STGCN/PEMS08.py --gpus '0'
diff --git a/baselines/STGODE/METR-LA copy.py b/baselines/STGODE/METR-LA copy.py
new file mode 100644
index 00000000..1efc8319
--- /dev/null
+++ b/baselines/STGODE/METR-LA copy.py	
@@ -0,0 +1,118 @@
+import os
+import sys
+
+# TODO: remove it when basicts can be installed by pip
+sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import torch
+from easydict import EasyDict
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.losses import masked_mae
+from basicts.utils import load_adj
+from .generate_matrices import generate_dtw_spa_matrix
+
+from .arch import STGODE
+
+CFG = EasyDict()
+
+# ================= general ================= #
+CFG.DESCRIPTION = "STGODE model configuration"
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+CFG.DATASET_CLS = TimeSeriesForecastingDataset
+CFG.DATASET_NAME = "METR-LA"
+CFG.DATASET_TYPE = "Traffic Speed"
+CFG.DATASET_INPUT_LEN = 12
+CFG.DATASET_OUTPUT_LEN = 12
+CFG.GPU_NUM = 1
+CFG.NULL_VAL = 0.0
+
+# ================= environment ================= #
+CFG.ENV = EasyDict()
+CFG.ENV.SEED = 1
+CFG.ENV.CUDNN = EasyDict()
+CFG.ENV.CUDNN.ENABLED = True
+
+# ================= model ================= #
+CFG.MODEL = EasyDict()
+CFG.MODEL.NAME = "STGODE"
+CFG.MODEL.ARCH = STGODE
+# read 
+A_se_wave, A_sp_wave = generate_dtw_spa_matrix(CFG.DATASET_NAME, CFG.DATASET_INPUT_LEN, CFG.DATASET_OUTPUT_LEN)
+CFG.MODEL.PARAM = {
+    "num_nodes": 207,
+    "num_features": 3,
+    "num_timesteps_input": 12,
+    "num_timesteps_output": 12,
+    "A_sp_hat" : A_sp_wave,
+    "A_se_hat" : A_se_wave
+}
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+# ================= optim ================= #
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.LOSS = masked_mae
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.002,
+    "weight_decay": 0.0001,
+}
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "StepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "step_size": 50,
+    "gamma": 0.5
+}
+
+# ================= train ================= #
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    "max_norm": 5.0
+}
+CFG.TRAIN.NUM_EPOCHS = 100
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    "checkpoints",
+    "_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
+)
+# train data
+CFG.TRAIN.DATA = EasyDict()
+# read data
+CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
+# dataloader args, optional
+CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.PREFETCH = False
+CFG.TRAIN.DATA.SHUFFLE = True
+CFG.TRAIN.DATA.NUM_WORKERS = 2
+CFG.TRAIN.DATA.PIN_MEMORY = False
+
+# ================= validate ================= #
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+# validating data
+CFG.VAL.DATA = EasyDict()
+# read data
+CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
+# dataloader args, optional
+CFG.VAL.DATA.BATCH_SIZE = 64
+CFG.VAL.DATA.PREFETCH = False
+CFG.VAL.DATA.SHUFFLE = False
+CFG.VAL.DATA.NUM_WORKERS = 2
+CFG.VAL.DATA.PIN_MEMORY = False
+
+# ================= test ================= #
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+# test data
+CFG.TEST.DATA = EasyDict()
+# read data
+CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
+# dataloader args, optional
+CFG.TEST.DATA.BATCH_SIZE = 64
+CFG.TEST.DATA.PREFETCH = False
+CFG.TEST.DATA.SHUFFLE = False
+CFG.TEST.DATA.NUM_WORKERS = 2
+CFG.TEST.DATA.PIN_MEMORY = False
+
+# ================= evaluate ================= #
+CFG.EVAL = EasyDict()
+CFG.EVAL.HORIZONS = [3, 6, 12]
diff --git a/baselines/STGODE/METR-LA.py b/baselines/STGODE/METR-LA.py
new file mode 100644
index 00000000..1acd09d8
--- /dev/null
+++ b/baselines/STGODE/METR-LA.py
@@ -0,0 +1,143 @@
+import os
+import sys
+import torch
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import STGODE
+from .generate_matrices import generate_dtw_spa_matrix
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STGODE
+A_se_wave, A_sp_wave = generate_dtw_spa_matrix(DATA_NAME)
+MODEL_PARAM = {
+    "num_nodes": 207,
+    "num_features": 3,
+    "num_timesteps_input": 12,
+    "num_timesteps_output": 12,
+    "A_sp_hat" : A_sp_wave,
+    "A_se_hat" : A_se_wave
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse,
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.002,
+    "weight_decay": 0.0001,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "StepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "step_size": 50,
+    "gamma": 0.5
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    "max_norm": 5.0
+}
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/DLinear/PEMS-BAY.py b/baselines/STGODE/PEMS-BAY.py
similarity index 84%
rename from baselines/DLinear/PEMS-BAY.py
rename to baselines/STGODE/PEMS-BAY.py
index 8eb308dc..34cf4a13 100644
--- a/baselines/DLinear/PEMS-BAY.py
+++ b/baselines/STGODE/PEMS-BAY.py
@@ -1,15 +1,17 @@
 import os
 import sys
+import torch
 from easydict import EasyDict
 sys.path.append(os.path.abspath(__file__ + '/../../..'))
 
-from basicts.metrics import masked_mae, masked_mse
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
 from basicts.data import TimeSeriesForecastingDataset
 from basicts.runners import SimpleTimeSeriesForecastingRunner
 from basicts.scaler import ZScoreScaler
 from basicts.utils import get_regular_settings
 
-from .arch import DLinear
+from .arch import STGODE
+from .generate_matrices import generate_dtw_spa_matrix
 
 ############################## Hot Parameters ##############################
 # Dataset & Metrics configuration
@@ -22,12 +24,15 @@
 RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
 NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
 # Model architecture and parameters
-MODEL_ARCH = DLinear
+MODEL_ARCH = STGODE
+A_se_wave, A_sp_wave = generate_dtw_spa_matrix(DATA_NAME)
 MODEL_PARAM = {
-    "seq_len": INPUT_LEN,
-    "pred_len": OUTPUT_LEN,
-    "individual": False,
-    "enc_in": 325
+    "num_nodes": 325,
+    "num_features": 3,
+    "num_timesteps_input": 12,
+    "num_timesteps_output": 12,
+    "A_sp_hat" : A_sp_wave,
+    "A_se_hat" : A_se_wave
 }
 NUM_EPOCHS = 100
 
@@ -69,7 +74,7 @@
 CFG.MODEL.NAME = MODEL_ARCH.__name__
 CFG.MODEL.ARCH = MODEL_ARCH
 CFG.MODEL.PARAM = MODEL_PARAM
-CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
 CFG.MODEL.TARGET_FEATURES = [0]
 
 ############################## Metrics Configuration ##############################
@@ -78,7 +83,8 @@
 # Metrics settings
 CFG.METRICS.FUNCS = EasyDict({
                                 'MAE': masked_mae,
-                                'MSE': masked_mse
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse,
                             })
 CFG.METRICS.TARGET = 'MAE'
 CFG.METRICS.NULL_VAL = NULL_VAL
@@ -101,18 +107,19 @@
 }
 # Learning rate scheduler settings
 CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.TYPE = "StepLR"
 CFG.TRAIN.LR_SCHEDULER.PARAM = {
-    "milestones": [1, 25],
+    "step_size": 50,
     "gamma": 0.5
 }
-CFG.TRAIN.CLIP_GRAD_PARAM = {
-    'max_norm': 5.0
-}
 # Train data loader settings
 CFG.TRAIN.DATA = EasyDict()
-CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.BATCH_SIZE = 16
 CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    "max_norm": 5.0
+}
 
 ############################## Validation Configuration ##############################
 CFG.VAL = EasyDict()
@@ -131,6 +138,6 @@
 CFG.EVAL = EasyDict()
 
 # Evaluation parameters
-CFG.EVAL.HORIZONS = [3, 6, 12]
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
 CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
 CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/STGODE/PEMS04.py b/baselines/STGODE/PEMS04.py
new file mode 100644
index 00000000..e35b438e
--- /dev/null
+++ b/baselines/STGODE/PEMS04.py
@@ -0,0 +1,143 @@
+import os
+import sys
+import torch
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import STGODE
+from .generate_matrices import generate_dtw_spa_matrix
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = STGODE
+A_se_wave, A_sp_wave = generate_dtw_spa_matrix(DATA_NAME)
+MODEL_PARAM = {
+    "num_nodes": 307,
+    "num_features": 3,
+    "num_timesteps_input": 12,
+    "num_timesteps_output": 12,
+    "A_sp_hat" : A_sp_wave,
+    "A_se_hat" : A_se_wave
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse,
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.002,
+    "weight_decay": 0.0001,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "StepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "step_size": 50,
+    "gamma": 0.5
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    "max_norm": 5.0
+}
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/DLinear/PEMS08.py b/baselines/STGODE/PEMS08.py
similarity index 84%
rename from baselines/DLinear/PEMS08.py
rename to baselines/STGODE/PEMS08.py
index e6008f8d..c89efeb9 100644
--- a/baselines/DLinear/PEMS08.py
+++ b/baselines/STGODE/PEMS08.py
@@ -1,15 +1,17 @@
 import os
 import sys
+import torch
 from easydict import EasyDict
 sys.path.append(os.path.abspath(__file__ + '/../../..'))
 
-from basicts.metrics import masked_mae, masked_mse
+from basicts.metrics import masked_mae, masked_mape, masked_rmse
 from basicts.data import TimeSeriesForecastingDataset
 from basicts.runners import SimpleTimeSeriesForecastingRunner
 from basicts.scaler import ZScoreScaler
 from basicts.utils import get_regular_settings
 
-from .arch import DLinear
+from .arch import STGODE
+from .generate_matrices import generate_dtw_spa_matrix
 
 ############################## Hot Parameters ##############################
 # Dataset & Metrics configuration
@@ -22,12 +24,15 @@
 RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
 NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
 # Model architecture and parameters
-MODEL_ARCH = DLinear
+MODEL_ARCH = STGODE
+A_se_wave, A_sp_wave = generate_dtw_spa_matrix(DATA_NAME)
 MODEL_PARAM = {
-    "seq_len": INPUT_LEN,
-    "pred_len": OUTPUT_LEN,
-    "individual": False,
-    "enc_in": 170
+    "num_nodes": 170,
+    "num_features": 3,
+    "num_timesteps_input": 12,
+    "num_timesteps_output": 12,
+    "A_sp_hat" : A_sp_wave,
+    "A_se_hat" : A_se_wave
 }
 NUM_EPOCHS = 100
 
@@ -69,7 +74,7 @@
 CFG.MODEL.NAME = MODEL_ARCH.__name__
 CFG.MODEL.ARCH = MODEL_ARCH
 CFG.MODEL.PARAM = MODEL_PARAM
-CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
 CFG.MODEL.TARGET_FEATURES = [0]
 
 ############################## Metrics Configuration ##############################
@@ -78,7 +83,8 @@
 # Metrics settings
 CFG.METRICS.FUNCS = EasyDict({
                                 'MAE': masked_mae,
-                                'MSE': masked_mse
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse,
                             })
 CFG.METRICS.TARGET = 'MAE'
 CFG.METRICS.NULL_VAL = NULL_VAL
@@ -101,18 +107,19 @@
 }
 # Learning rate scheduler settings
 CFG.TRAIN.LR_SCHEDULER = EasyDict()
-CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.TYPE = "StepLR"
 CFG.TRAIN.LR_SCHEDULER.PARAM = {
-    "milestones": [1, 25],
+    "step_size": 50,
     "gamma": 0.5
 }
-CFG.TRAIN.CLIP_GRAD_PARAM = {
-    'max_norm': 5.0
-}
 # Train data loader settings
 CFG.TRAIN.DATA = EasyDict()
-CFG.TRAIN.DATA.BATCH_SIZE = 64
+CFG.TRAIN.DATA.BATCH_SIZE = 16
 CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    "max_norm": 5.0
+}
 
 ############################## Validation Configuration ##############################
 CFG.VAL = EasyDict()
@@ -131,6 +138,6 @@
 CFG.EVAL = EasyDict()
 
 # Evaluation parameters
-CFG.EVAL.HORIZONS = [3, 6, 12]
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
 CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
 CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/STGODE/arch/__init__.py b/baselines/STGODE/arch/__init__.py
new file mode 100644
index 00000000..c4094b39
--- /dev/null
+++ b/baselines/STGODE/arch/__init__.py
@@ -0,0 +1 @@
+from .stgode import ODEGCN as STGODE
diff --git a/baselines/STGODE/arch/odegcn.py b/baselines/STGODE/arch/odegcn.py
new file mode 100644
index 00000000..c2133814
--- /dev/null
+++ b/baselines/STGODE/arch/odegcn.py
@@ -0,0 +1,73 @@
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+# Whether use adjoint method or not.
+adjoint = False
+if adjoint:
+    from torchdiffeq import odeint_adjoint as odeint
+else:
+    from torchdiffeq import odeint
+
+
+# Define the ODE function.
+# Input:
+# --- t: A tensor with shape [], meaning the current time.
+# --- x: A tensor with shape [#batches, dims], meaning the value of x at t.
+# Output:
+# --- dx/dt: A tensor with shape [#batches, dims], meaning the derivative of x at t.
+class ODEFunc(nn.Module):
+
+    def __init__(self, feature_dim, temporal_dim, adj):
+        super(ODEFunc, self).__init__()
+        self.adj = adj
+        self.x0 = None
+        self.alpha = nn.Parameter(0.8 * torch.ones(adj.shape[1]))
+        self.beta = 0.6
+        self.w = nn.Parameter(torch.eye(feature_dim))
+        self.d = nn.Parameter(torch.zeros(feature_dim) + 1)
+        self.w2 = nn.Parameter(torch.eye(temporal_dim))
+        self.d2 = nn.Parameter(torch.zeros(temporal_dim) + 1)
+
+    def forward(self, t, x):
+        alpha = torch.sigmoid(self.alpha).unsqueeze(-1).unsqueeze(-1).unsqueeze(0)
+        xa = torch.einsum('ij, kjlm->kilm', self.adj.to(x.device), x)
+
+        # ensure the eigenvalues to be less than 1
+        d = torch.clamp(self.d, min=0, max=1)
+        w = torch.mm(self.w * d, torch.t(self.w))
+        xw = torch.einsum('ijkl, lm->ijkm', x, w)
+
+        d2 = torch.clamp(self.d2, min=0, max=1)
+        w2 = torch.mm(self.w2 * d2, torch.t(self.w2))
+        xw2 = torch.einsum('ijkl, km->ijml', x, w2)
+
+        f = alpha / 2 * xa - x + xw - x + xw2 - x + self.x0
+        return f
+
+
+class ODEblock(nn.Module):
+    def __init__(self, odefunc, t=torch.tensor([0,1])):
+        super(ODEblock, self).__init__()
+        self.t = t
+        self.odefunc = odefunc
+
+    def set_x0(self, x0):
+        self.odefunc.x0 = x0.clone().detach()
+
+    def forward(self, x):
+        t = self.t.type_as(x)
+        z = odeint(self.odefunc, x, t, method='euler')[1]
+        return z
+
+
+# Define the ODEGCN model.
+class ODEG(nn.Module):
+    def __init__(self, feature_dim, temporal_dim, adj, time):
+        super(ODEG, self).__init__()
+        self.odeblock = ODEblock(ODEFunc(feature_dim, temporal_dim, adj), t=torch.tensor([0, time]))
+
+    def forward(self, x):
+        self.odeblock.set_x0(x)
+        z = self.odeblock(x)
+        return F.relu(z)
diff --git a/baselines/STGODE/arch/stgode.py b/baselines/STGODE/arch/stgode.py
new file mode 100644
index 00000000..ca2ba22f
--- /dev/null
+++ b/baselines/STGODE/arch/stgode.py
@@ -0,0 +1,174 @@
+import torch
+import math
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .odegcn import ODEG
+
+
+class Chomp1d(nn.Module):
+    """
+    extra dimension will be added by padding, remove it
+    """
+    def __init__(self, chomp_size):
+        super(Chomp1d, self).__init__()
+        self.chomp_size = chomp_size
+
+    def forward(self, x):
+        return x[:, :, :, :-self.chomp_size].contiguous()
+
+
+class TemporalConvNet(nn.Module):
+    """
+    time dilation convolution
+    """
+    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
+        """
+        Args:
+            num_inputs : channel's number of input data's feature
+            num_channels : numbers of data feature tranform channels, the last is the output channel
+            kernel_size : using 1d convolution, so the real kernel is (1, kernel_size) 
+        """
+        super(TemporalConvNet, self).__init__()
+        layers = []
+        num_levels = len(num_channels)
+        for i in range(num_levels):
+            dilation_size = 2 ** i
+            in_channels = num_inputs if i == 0 else num_channels[i-1]
+            out_channels = num_channels[i]
+            padding = (kernel_size - 1) * dilation_size
+            self.conv = nn.Conv2d(in_channels, out_channels, (1, kernel_size), dilation=(1, dilation_size), padding=(0, padding))
+            self.conv.weight.data.normal_(0, 0.01)
+            self.chomp = Chomp1d(padding)
+            self.relu = nn.ReLU()
+            self.dropout = nn.Dropout(dropout)
+
+            layers += [nn.Sequential(self.conv, self.chomp, self.relu, self.dropout)]
+
+        self.network = nn.Sequential(*layers)
+        self.downsample = nn.Conv2d(num_inputs, num_channels[-1], (1, 1)) if num_inputs != num_channels[-1] else None
+        if self.downsample:
+            self.downsample.weight.data.normal_(0, 0.01)
+
+    def forward(self, x):
+        """ 
+        like ResNet
+        Args:
+            X : input data of shape (B, N, T, F) 
+        """
+        # permute shape to (B, F, N, T)
+        y = x.permute(0, 3, 1, 2)
+        y = F.relu(self.network(y) + self.downsample(y) if self.downsample else y)
+        y = y.permute(0, 2, 3, 1)
+        return y
+
+
+class GCN(nn.Module):
+    def __init__(self, A_hat, in_channels, out_channels,):
+        super(GCN, self).__init__()
+        self.A_hat = A_hat
+        self.theta = nn.Parameter(torch.FloatTensor(in_channels, out_channels))
+        self.reset()
+    
+    def reset(self):
+        stdv = 1. / math.sqrt(self.theta.shape[1])
+        self.theta.data.uniform_(-stdv, stdv)
+
+    def forward(self, X):
+        y = torch.einsum('ij, kjlm-> kilm', self.A_hat, X)
+        return F.relu(torch.einsum('kjlm, mn->kjln', y, self.theta))
+
+
+class STGCNBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, num_nodes, A_hat):
+        """
+        Args:
+            in_channels: Number of input features at each node in each time step.
+            out_channels: a list of feature channels in timeblock, the last is output feature channel
+            num_nodes: Number of nodes in the graph
+            A_hat: the normalized adjacency matrix
+        """
+        super(STGCNBlock, self).__init__()
+        self.A_hat = A_hat
+        self.temporal1 = TemporalConvNet(num_inputs=in_channels,
+                                   num_channels=out_channels)
+        self.odeg = ODEG(out_channels[-1], 12, A_hat, time=6)
+        self.temporal2 = TemporalConvNet(num_inputs=out_channels[-1],
+                                   num_channels=out_channels)
+        self.batch_norm = nn.BatchNorm2d(num_nodes)
+
+    def forward(self, X):
+        """
+        Args:
+            X: Input data of shape (batch_size, num_nodes, num_timesteps, num_features)
+        Return:
+            Output data of shape(batch_size, num_nodes, num_timesteps, out_channels[-1])
+        """
+        t = self.temporal1(X)
+        t = self.odeg(t)
+        t = self.temporal2(F.relu(t))
+
+        return self.batch_norm(t)
+
+
+class ODEGCN(nn.Module):
+    """
+    Paper: Spatial-Temporal Graph ODE Networks for Traffic Flow Forecasting
+    Link: https://arxiv.org/abs/2106.12931
+    Official Codes: https://github.com/square-coder/STGODE
+    """
+    def __init__(self, num_nodes, num_features, num_timesteps_input,
+                 num_timesteps_output, A_sp_hat, A_se_hat):
+        """ 
+        Args:
+            num_nodes : number of nodes in the graph
+            num_features : number of features at each node in each time step
+            num_timesteps_input : number of past time steps fed into the network
+            num_timesteps_output : desired number of future time steps output by the network
+            A_sp_hat : nomarlized adjacency spatial matrix
+            A_se_hat : nomarlized adjacency semantic matrix
+        """        
+
+        super(ODEGCN, self).__init__()
+        # spatial graph
+        self.sp_blocks = nn.ModuleList(
+            [nn.Sequential(
+                STGCNBlock(in_channels=num_features, out_channels=[64, 32, 64],
+                num_nodes=num_nodes, A_hat=A_sp_hat),
+                STGCNBlock(in_channels=64, out_channels=[64, 32, 64],
+                num_nodes=num_nodes, A_hat=A_sp_hat)) for _ in range(3)
+            ])
+        # semantic graph
+        self.se_blocks = nn.ModuleList([nn.Sequential(
+                STGCNBlock(in_channels=num_features, out_channels=[64, 32, 64],
+                num_nodes=num_nodes, A_hat=A_se_hat),
+                STGCNBlock(in_channels=64, out_channels=[64, 32, 64],
+                num_nodes=num_nodes, A_hat=A_se_hat)) for _ in range(3)
+            ]) 
+
+        self.pred = nn.Sequential(
+            nn.Linear(num_timesteps_input * 64, num_timesteps_output * 32), 
+            nn.ReLU(),
+            nn.Linear(num_timesteps_output * 32, num_timesteps_output)
+        )
+
+    def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor:
+        """
+        Args:
+            x : input data of shape (batch_size, num_nodes, num_timesteps, num_features) == (B, N, T, F)
+        Returns:
+            prediction for future of shape (batch_size, num_nodes, num_timesteps_output)
+        """
+        x = history_data.transpose(1, 2)
+        outs = []
+        # spatial graph
+        for blk in self.sp_blocks:
+            outs.append(blk(x))
+        # semantic graph
+        for blk in self.se_blocks:
+            outs.append(blk(x))
+        outs = torch.stack(outs)
+        x = torch.max(outs, dim=0)[0]
+        x = x.reshape((x.shape[0], x.shape[1], -1))
+        x = self.pred(x).transpose(1, 2).unsqueeze(-1)    # B, N, T
+        return x
diff --git a/baselines/STGODE/generate_matrices.py b/baselines/STGODE/generate_matrices.py
new file mode 100644
index 00000000..d4876cb4
--- /dev/null
+++ b/baselines/STGODE/generate_matrices.py
@@ -0,0 +1,116 @@
+# STGODE has a different way of generating the matrices, so we need to use this script to generate the matrices for STGODE
+import os
+import sys
+# TODO: remove it when basicts can be installed by pip
+sys.path.append(os.path.abspath(__file__ + "/../../.."))
+import csv
+import pickle
+import argparse
+
+import torch
+import numpy as np
+from tqdm import tqdm
+from fastdtw import fastdtw
+
+from basicts.utils.serialization import load_pkl, load_dataset_data
+
+
+def get_normalized_adj(A):
+    """
+    Returns a tensor, the degree normalized adjacency matrix.
+    """
+    alpha = 0.8
+    D = np.array(np.sum(A, axis=1)).reshape((-1,))
+    D[D <= 10e-5] = 10e-5    # Prevent infs
+    diag = np.reciprocal(np.sqrt(D))
+    A_wave = np.multiply(np.multiply(diag.reshape((-1, 1)), A),
+                         diag.reshape((1, -1)))
+    A_reg = alpha / 2 * (np.eye(A.shape[0]) + A_wave)
+    return torch.from_numpy(A_reg.astype(np.float32))
+
+
+def generate_dtw_spa_matrix(dataset_name, sigma1=0.1, thres1=0.6, sigma2=10, thres2=0.5):
+    """read data, generate spatial adjacency matrix and semantic adjacency matrix by dtw
+
+    Args:
+        sigma1: float, default=0.1, sigma for the semantic matrix
+        sigma2: float, default=10, sigma for the spatial matrix
+        thres1: float, default=0.6, the threshold for the semantic matrix
+        thres2: float, default=0.5, the threshold for the spatial matrix
+
+    Returns:
+        data: tensor, T * N * 1
+        dtw_matrix: array, semantic adjacency matrix
+        sp_matrix: array, spatial adjacency matrix
+    """
+
+    # original STGODE use the full time series to generate the matrices, which is not reasonable since the test set is not available in real world
+    data = load_dataset_data(dataset_name=dataset_name)
+    num_node = data.shape[1]
+    if not os.path.exists('{0}/{1}_dtw_distance.npy'.format(os.path.abspath(__file__ + "/.."), dataset_name)):
+        print("generate dtw distance matrix")
+        data_mean = np.mean([data[:, :, 0][24*12*i: 24*12*(i+1)] for i in range(data.shape[0]//(24*12))], axis=0)
+        data_mean = data_mean.squeeze().T 
+        dtw_distance = np.zeros((num_node, num_node))
+        for i in tqdm(range(num_node)):
+            for j in range(i, num_node):
+                dtw_distance[i][j] = fastdtw(data_mean[i], data_mean[j], radius=6)[0]
+        for i in range(num_node):
+            for j in range(i):
+                dtw_distance[i][j] = dtw_distance[j][i]
+        np.save('{0}/{1}_dtw_distance.npy'.format(os.path.abspath(__file__ + "/.."), dataset_name), dtw_distance)
+
+    dist_matrix = np.load('{0}/{1}_dtw_distance.npy'.format(os.path.abspath(__file__ + "/.."), dataset_name))
+
+    mean = np.mean(dist_matrix)
+    std = np.std(dist_matrix)
+    dist_matrix = (dist_matrix - mean) / std
+    sigma = sigma1
+    dist_matrix = np.exp(-dist_matrix ** 2 / sigma ** 2)
+    dtw_matrix = np.zeros_like(dist_matrix)
+    dtw_matrix[dist_matrix > thres1] = 1
+
+    # STGODE provides the scripts to generate spatial matrix for PEMS03, PEMS04, PEMS07, PEMS08
+    # For other datasets, we use the original spatial matrix.    
+    if dataset_name in ["PEMS03", "PEMS04", "PEMS07", "PEMS08"]:
+        print("STGODE generate spatial matrix based on the raw data. Please ensure the raw data is placed in the correct path `datasets/raw_data/$DATASET_NAME/$DATASET_NAME.csv.")
+        if not os.path.exists('{0}/{1}_spatial_distance.npy'.format(os.path.abspath(__file__ + "/.."), dataset_name)):
+            graph_csv_file_path = "./datasets/raw_data/{0}/{0}.csv".format(dataset_name)
+            with open(graph_csv_file_path, 'r') as fp:
+                dist_matrix = np.zeros((num_node, num_node)) + np.float('inf')
+                file = csv.reader(fp)
+                for line in file:
+                    break
+                for line in file:
+                    start = int(line[0])
+                    end = int(line[1])
+                    dist_matrix[start][end] = float(line[2])
+                    dist_matrix[end][start] = float(line[2])
+                np.save('{0}/{1}_spatial_distance.npy'.format(os.path.abspath(__file__ + "/.."), dataset_name), dist_matrix)
+
+        dist_matrix = np.load('{0}/{1}_spatial_distance.npy'.format(os.path.abspath(__file__ + "/.."), dataset_name))
+        # normalization
+        std = np.std(dist_matrix[dist_matrix != np.float('inf')])
+        mean = np.mean(dist_matrix[dist_matrix != np.float('inf')])
+        dist_matrix = (dist_matrix - mean) / std
+        sigma = sigma2
+        sp_matrix = np.exp(- dist_matrix**2 / sigma**2)
+        sp_matrix[sp_matrix < thres2] = 0 
+    else:
+        spatial_distance_file = "./datasets/{0}/adj_mx.pkl".format(dataset_name)
+        sp_matrix = load_pkl(spatial_distance_file)[-1]
+
+    print(f'average degree of spatial graph is {np.sum(sp_matrix > 0)/2/num_node}')
+    print(f'average degree of semantic graph is {np.sum(dtw_matrix > 0)/2/num_node}')
+    # normalize
+    dtw_matrix = get_normalized_adj(dtw_matrix)
+    sp_matrix = get_normalized_adj(sp_matrix)
+    return dtw_matrix, sp_matrix
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    # generate_dtw_spa_matrix("PEMS04")
+    # generate_dtw_spa_matrix("PEMS08")
+    generate_dtw_spa_matrix("PEMS-BAY")
+    generate_dtw_spa_matrix("METR-LA")
diff --git a/baselines/STID/run.sh b/baselines/STID/run.sh
deleted file mode 100644
index 75941b9e..00000000
--- a/baselines/STID/run.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/STID/METR-LA.py --gpus '0'
-python experiments/train.py -c baselines/STID/PEMS-BAY.py --gpus '0'
-python experiments/train.py -c baselines/STID/PEMS03.py --gpus '0'
-python experiments/train.py -c baselines/STID/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/STID/PEMS07.py --gpus '0'
-python experiments/train.py -c baselines/STID/PEMS08.py --gpus '0'
-
diff --git a/baselines/STNorm/run.sh b/baselines/STNorm/run.sh
deleted file mode 100644
index 2e33cf70..00000000
--- a/baselines/STNorm/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/STNorm/METR-LA.py --gpus '0'
-python experiments/train.py -c baselines/STNorm/PEMS-BAY.py --gpus '0'
-python experiments/train.py -c baselines/STNorm/PEMS03.py --gpus '0'
-python experiments/train.py -c baselines/STNorm/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/STNorm/PEMS07.py --gpus '0'
-python experiments/train.py -c baselines/STNorm/PEMS08.py --gpus '0'
diff --git a/baselines/STWave/run.sh b/baselines/STWave/run.sh
deleted file mode 100644
index 61fd7ca0..00000000
--- a/baselines/STWave/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/STWave/METR-LA.py --gpus '0'
-python experiments/train.py -c baselines/STWave/PEMS-BAY.py --gpus '0'
-python experiments/train.py -c baselines/STWave/PEMS03.py --gpus '0'
-python experiments/train.py -c baselines/STWave/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/STWave/PEMS07.py --gpus '0'
-python experiments/train.py -c baselines/STWave/PEMS08.py --gpus '0'
diff --git a/baselines/StemGNN/run.sh b/baselines/StemGNN/run.sh
deleted file mode 100644
index 25cb8053..00000000
--- a/baselines/StemGNN/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/StemGNN/METR-LA.py --gpus '0'
-python experiments/train.py -c baselines/StemGNN/PEMS-BAY.py --gpus '0'
-python experiments/train.py -c baselines/StemGNN/PEMS03.py --gpus '0'
-python experiments/train.py -c baselines/StemGNN/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/StemGNN/PEMS07.py --gpus '0'
-python experiments/train.py -c baselines/StemGNN/PEMS08.py --gpus '0'
diff --git a/baselines/TimesNet/run.sh b/baselines/TimesNet/run.sh
deleted file mode 100644
index 9df02f42..00000000
--- a/baselines/TimesNet/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-python experiments/train.py -c baselines/TimesNet/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/TimesNet/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/TimesNet/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/TimesNet/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/TimesNet/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/TimesNet/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/TimesNet/Weather.py --gpus '0'
-python experiments/train.py -c baselines/TimesNet/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/TimesNet/PEMS08.py --gpus '0'
diff --git a/baselines/Triformer/PEMS04_LTSF.py b/baselines/Triformer/PEMS04_LTSF.py
new file mode 100644
index 00000000..318ce170
--- /dev/null
+++ b/baselines/Triformer/PEMS04_LTSF.py
@@ -0,0 +1,133 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_rmse, masked_mape
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Triformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS04'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output 
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Triformer
+NUM_NODES = 307
+MODEL_PARAM = {
+    "num_nodes": NUM_NODES,
+    "lag": INPUT_LEN,
+    "horizon": OUTPUT_LEN,
+    "input_dim": 3,
+    # default parameters described in the paper
+    "channels": 32,
+    "patch_sizes": [7, 4, 3, 2, 2],
+    "mem_dim": 5
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.0001
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/Triformer/PEMS08_LTSF.py b/baselines/Triformer/PEMS08_LTSF.py
new file mode 100644
index 00000000..fcaf251a
--- /dev/null
+++ b/baselines/Triformer/PEMS08_LTSF.py
@@ -0,0 +1,133 @@
+import os
+import sys
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_rmse, masked_mape
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings
+
+from .arch import Triformer
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'PEMS08'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+# INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+# OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output 
+INPUT_LEN = 336 # LTSF
+OUTPUT_LEN = 336 # LTSF
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = Triformer
+NUM_NODES = 170
+MODEL_PARAM = {
+    "num_nodes": NUM_NODES,
+    "lag": INPUT_LEN,
+    "horizon": OUTPUT_LEN,
+    "input_dim": 3,
+    # default parameters described in the paper
+    "channels": 32,
+    "patch_sizes": [7, 4, 3, 2, 2],
+    "mem_dim": 5
+    }
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0, 1, 2]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MAPE': masked_mape,
+                                'RMSE': masked_rmse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.0001
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [12, 24, 48, 96, 192, 288, 336]
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/Triformer/run.sh b/baselines/Triformer/run.sh
deleted file mode 100644
index cda838f5..00000000
--- a/baselines/Triformer/run.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-# # !/bin/bash
-python experiments/train.py -c baselines/Triformer/ETTh1.py --gpus '0'
-python experiments/train.py -c baselines/Triformer/ETTh2.py --gpus '0'
-python experiments/train.py -c baselines/Triformer/ETTm1.py --gpus '0'
-python experiments/train.py -c baselines/Triformer/ETTm2.py --gpus '0'
-python experiments/train.py -c baselines/Triformer/Electricity.py --gpus '0'
-python experiments/train.py -c baselines/Triformer/ExchangeRate.py --gpus '0'
-python experiments/train.py -c baselines/Triformer/Weather.py --gpus '0'
-python experiments/train.py -c baselines/Triformer/PEMS04.py --gpus '0'
-python experiments/train.py -c baselines/Triformer/PEMS08.py --gpus '0'
diff --git a/baselines/WaveNet/METR-LA.py b/baselines/WaveNet/METR-LA.py
new file mode 100644
index 00000000..071cf50b
--- /dev/null
+++ b/baselines/WaveNet/METR-LA.py
@@ -0,0 +1,143 @@
+import os
+import sys
+import torch
+from easydict import EasyDict
+sys.path.append(os.path.abspath(__file__ + '/../../..'))
+
+from basicts.metrics import masked_mae, masked_mse
+from basicts.data import TimeSeriesForecastingDataset
+from basicts.runners import SimpleTimeSeriesForecastingRunner
+from basicts.scaler import ZScoreScaler
+from basicts.utils import get_regular_settings, load_adj
+
+from .arch import WaveNet
+
+############################## Hot Parameters ##############################
+# Dataset & Metrics configuration
+DATA_NAME = 'METR-LA'  # Dataset name
+regular_settings = get_regular_settings(DATA_NAME)
+INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
+OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
+TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
+NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
+RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
+NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
+# Model architecture and parameters
+MODEL_ARCH = WaveNet
+MODEL_PARAM = {
+    "in_dim": 1,
+    "out_dim": OUTPUT_LEN,
+    "residual_channels": 16,
+    "dilation_channels": 16,
+    "skip_channels": 64,
+    "end_channels": 128,
+    "kernel_size": 12,
+    "blocks": 6,
+    "layers": 3
+}
+NUM_EPOCHS = 100
+
+############################## General Configuration ##############################
+CFG = EasyDict()
+# General settings
+CFG.DESCRIPTION = 'An Example Config'
+CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
+# Runner
+CFG.RUNNER = SimpleTimeSeriesForecastingRunner
+
+############################## Dataset Configuration ##############################
+CFG.DATASET = EasyDict()
+# Dataset settings
+CFG.DATASET.NAME = DATA_NAME
+CFG.DATASET.TYPE = TimeSeriesForecastingDataset
+CFG.DATASET.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
+    'input_len': INPUT_LEN,
+    'output_len': OUTPUT_LEN,
+    # 'mode' is automatically set by the runner
+})
+
+############################## Scaler Configuration ##############################
+CFG.SCALER = EasyDict()
+# Scaler settings
+CFG.SCALER.TYPE = ZScoreScaler # Scaler class
+CFG.SCALER.PARAM = EasyDict({
+    'dataset_name': DATA_NAME,
+    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
+    'norm_each_channel': NORM_EACH_CHANNEL,
+    'rescale': RESCALE,
+})
+
+############################## Model Configuration ##############################
+CFG.MODEL = EasyDict()
+# Model settings
+CFG.MODEL.NAME = MODEL_ARCH.__name__
+CFG.MODEL.ARCH = MODEL_ARCH
+CFG.MODEL.PARAM = MODEL_PARAM
+CFG.MODEL.FORWARD_FEATURES = [0]
+CFG.MODEL.TARGET_FEATURES = [0]
+
+############################## Metrics Configuration ##############################
+
+CFG.METRICS = EasyDict()
+# Metrics settings
+CFG.METRICS.FUNCS = EasyDict({
+                                'MAE': masked_mae,
+                                'MSE': masked_mse
+                            })
+CFG.METRICS.TARGET = 'MAE'
+CFG.METRICS.NULL_VAL = NULL_VAL
+
+############################## Training Configuration ##############################
+CFG.TRAIN = EasyDict()
+CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
+CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
+    'checkpoints',
+    MODEL_ARCH.__name__,
+    '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
+)
+CFG.TRAIN.LOSS = masked_mae
+# Optimizer settings
+CFG.TRAIN.OPTIM = EasyDict()
+CFG.TRAIN.OPTIM.TYPE = "Adam"
+CFG.TRAIN.OPTIM.PARAM = {
+    "lr": 0.002,
+    "weight_decay": 0.0001,
+}
+# Learning rate scheduler settings
+CFG.TRAIN.LR_SCHEDULER = EasyDict()
+CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
+CFG.TRAIN.LR_SCHEDULER.PARAM = {
+    "milestones": [1, 50],
+    "gamma": 0.5
+}
+# Train data loader settings
+CFG.TRAIN.DATA = EasyDict()
+CFG.TRAIN.DATA.BATCH_SIZE = 16
+CFG.TRAIN.DATA.SHUFFLE = True
+# Gradient clipping settings
+CFG.TRAIN.CLIP_GRAD_PARAM = {
+    "max_norm": 5.0
+}
+
+############################## Validation Configuration ##############################
+CFG.VAL = EasyDict()
+CFG.VAL.INTERVAL = 1
+CFG.VAL.DATA = EasyDict()
+CFG.VAL.DATA.BATCH_SIZE = 64
+
+############################## Test Configuration ##############################
+CFG.TEST = EasyDict()
+CFG.TEST.INTERVAL = 1
+CFG.TEST.DATA = EasyDict()
+CFG.TEST.DATA.BATCH_SIZE = 64
+
+############################## Evaluation Configuration ##############################
+
+CFG.EVAL = EasyDict()
+
+# Evaluation parameters
+CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
+CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
+CFG.TRAIN.EARLY_STOPPING_PATIENCE = 10 #  stopping patience. Default: None. If not specified, the  stopping will not be used.
diff --git a/baselines/WaveNet/arch.py b/baselines/WaveNet/arch.py
new file mode 100644
index 00000000..39a41022
--- /dev/null
+++ b/baselines/WaveNet/arch.py
@@ -0,0 +1,147 @@
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+
+class linear(nn.Module):
+    """Linear layer."""
+
+    def __init__(self, c_in, c_out):
+        super(linear, self).__init__()
+        self.mlp = torch.nn.Conv2d(c_in, c_out, kernel_size=(
+            1, 1), padding=(0, 0), stride=(1, 1), bias=True)
+
+    def forward(self, x):
+        return self.mlp(x)
+
+class WaveNet(nn.Module):
+    """
+    Paper: Wavenet: A generative model for raw audio.
+    Link: https://arxiv.org/abs/1609.03499
+    Modified from:
+        https://github.com/nnzhan/Graph-WaveNet/blob/master/model.py
+        https://github.com/JLDeng/ST-Norm/blob/master/models/Wavenet.py
+    """
+
+    def __init__(self, in_dim=2, out_dim=12, residual_channels=32,
+                    dilation_channels=32, skip_channels=256, end_channels=512,
+                    kernel_size=2, blocks=4, layers=2):
+        super(WaveNet, self).__init__()
+        self.blocks = blocks
+        self.layers = layers
+
+        self.filter_convs = nn.ModuleList()
+        self.gate_convs = nn.ModuleList()
+        self.residual_convs = nn.ModuleList()
+        self.skip_convs = nn.ModuleList()
+        self.bn = nn.ModuleList()
+
+        self.start_conv = nn.Conv2d(in_channels=in_dim,
+                                    out_channels=residual_channels,
+                                    kernel_size=(1, 1))
+
+        receptive_field = 1
+
+        for b in range(blocks):
+            additional_scope = kernel_size - 1
+            new_dilation = 1
+            for i in range(layers):
+                # dilated convolutions
+                self.filter_convs.append(nn.Conv2d(in_channels=residual_channels,
+                                                   out_channels=dilation_channels,
+                                                   kernel_size=(1, kernel_size), dilation=new_dilation))
+
+                self.gate_convs.append(nn.Conv1d(in_channels=residual_channels,
+                                                 out_channels=dilation_channels,
+                                                 kernel_size=(1, kernel_size), dilation=new_dilation))
+
+                # 1x1 convolution for residual connection
+                self.residual_convs.append(nn.Conv1d(in_channels=dilation_channels,
+                                                     out_channels=residual_channels,
+                                                     kernel_size=(1, 1)))
+
+                # 1x1 convolution for skip connection
+                self.skip_convs.append(nn.Conv1d(in_channels=dilation_channels,
+                                                 out_channels=skip_channels,
+                                                 kernel_size=(1, 1)))
+                self.bn.append(nn.BatchNorm2d(residual_channels))
+                new_dilation *= 2
+                receptive_field += additional_scope
+                additional_scope *= 2
+
+        self.end_conv_1 = nn.Conv2d(in_channels=skip_channels,
+                                    out_channels=end_channels,
+                                    kernel_size=(1, 1),
+                                    bias=True)
+
+        self.end_conv_2 = nn.Conv2d(in_channels=end_channels,
+                                    out_channels=out_dim,
+                                    kernel_size=(1, 1),
+                                    bias=True)
+
+        self.receptive_field = receptive_field
+        print(self.receptive_field)
+
+    def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor:
+        """Feedforward function of WaveNet.
+
+        Args:
+            history_data (torch.Tensor): shape [B, L, N, C]
+
+        Returns:
+            torch.Tensor: [B, L, N, 1]
+        """
+
+        input = history_data.transpose(1, 3).contiguous()
+        in_len = input.size(3)
+        if in_len < self.receptive_field:
+            x = nn.functional.pad(
+                input, (self.receptive_field-in_len, 0, 0, 0))
+        else:
+            x = input
+        x = self.start_conv(x)
+        skip = 0
+
+        # WaveNet layers
+        for i in range(self.blocks * self.layers):
+
+            #            |----------------------------------------|     *residual*
+            #            |                                        |
+            #            |    |-- conv -- tanh --|                |
+            # -> dilate -|----|                  * ----|-- 1x1 -- + -->	*input*
+            #                 |-- conv -- sigm --|     |
+            #                                         1x1
+            #                                          |
+            # ---------------------------------------> + ------------->	*skip*
+
+            #(dilation, init_dilation) = self.dilations[i]
+
+            #residual = dilation_func(x, dilation, init_dilation, i)
+            residual = x
+            # dilated convolution
+            filter = self.filter_convs[i](residual)
+            filter = torch.tanh(filter)
+            gate = self.gate_convs[i](residual)
+            gate = torch.sigmoid(gate)
+            x = filter * gate
+
+            # parametrized skip connection
+
+            s = x
+            s = self.skip_convs[i](s)
+            try:
+                skip = skip[:, :, :,  -s.size(3):]
+            except:
+                skip = 0
+            skip = s + skip
+
+            x = self.residual_convs[i](x)
+
+            x = x + residual[:, :, :, -x.size(3):]
+
+            x = self.bn[i](x)
+
+        x = F.relu(skip)
+        x = F.relu(self.end_conv_1(x))
+        x = self.end_conv_2(x)
+        return x
diff --git a/basicts/metrics/__init__.py b/basicts/metrics/__init__.py
index 71f0ac2d..eaa7da28 100644
--- a/basicts/metrics/__init__.py
+++ b/basicts/metrics/__init__.py
@@ -4,10 +4,19 @@
 from .mape import masked_mape
 from .wape import masked_wape
 
+ALL_METRICS = {
+            'MAE': masked_mae,
+            'MSE': masked_mse,
+            'RMSE': masked_rmse,
+            'MAPE': masked_mape,
+            'WAPE': masked_wape
+            }
+
 __all__ = [
     'masked_mae',
     'masked_mse',
     'masked_rmse',
     'masked_mape',
     'masked_wape',
+    'ALL_METRICS'
 ]
diff --git a/basicts/runners/base_runner.py b/basicts/runners/base_runner.py
index bbe34cd9..c478e4b7 100644
--- a/basicts/runners/base_runner.py
+++ b/basicts/runners/base_runner.py
@@ -46,7 +46,7 @@ def __init__(self, cfg: Dict) -> None:
             self.to_running_device = to_device
 
         # set process title
-        proctitle_name = f"{cfg['MODEL'].get('NAME', ' ')}({cfg.get('DATASET', {}).get('NAME', 'Unknown Dataset')})"
+        proctitle_name = f"{cfg['MODEL'].get('NAME')}({cfg.get('DATASET', {}).get('NAME', 'Unknown Dataset')})"
         setproctitle.setproctitle(f'{proctitle_name}@BasicTS')
 
     @staticmethod
diff --git a/scripts/data_preparation/BeijingAirQuality/generate_training_data.py b/scripts/data_preparation/BeijingAirQuality/generate_training_data.py
index db15ae64..016cc964 100644
--- a/scripts/data_preparation/BeijingAirQuality/generate_training_data.py
+++ b/scripts/data_preparation/BeijingAirQuality/generate_training_data.py
@@ -17,12 +17,13 @@
 domain = 'Beijing air quality'
 feature_description = [domain, 'time of day', 'day of week']
 regular_settings = {
-    "INPUT_LEN": 336,
-    "OUTPUT_LEN": 336,
-    "TRAIN_VAL_TEST_RATIO": [0.6, 0.2, 0.2],
-    "NORM_EACH_CHANNEL": True,
-    "RESCALE": False,
-    "NULL_VAL": np.nan
+    'INPUT_LEN': 336,
+    'OUTPUT_LEN': 336,
+    'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+    'NORM_EACH_CHANNEL': True,
+    'RESCALE': False,
+    'METRICS': ['MAE', 'MSE'],
+    'NULL_VAL': np.nan
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/CA/generate_training_data.py b/scripts/data_preparation/CA/generate_training_data.py
index 057c2aa6..d621c9fb 100644
--- a/scripts/data_preparation/CA/generate_training_data.py
+++ b/scripts/data_preparation/CA/generate_training_data.py
@@ -22,12 +22,13 @@
 domain = 'traffic flow'
 feature_description = [domain, 'time of day', 'day of week']
 regular_settings = {
-    "INPUT_LEN": 12,
-    "OUTPUT_LEN": 12,
-    "TRAIN_VAL_TEST_RATIO": [0.6, 0.2, 0.2],
-    "NORM_EACH_CHANNEL": False,
-    "RESCALE": True,
-    "NULL_VAL": 0.0
+    'INPUT_LEN': 12,
+    'OUTPUT_LEN': 12,
+    'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+    'NORM_EACH_CHANNEL': False,
+    'RESCALE': True,
+    'METRICS': ['MAE', 'RMSE', 'MAPE'],
+    'NULL_VAL': 0.0
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/ETTh1/generate_training_data.py b/scripts/data_preparation/ETTh1/generate_training_data.py
index 5321877b..8991825a 100644
--- a/scripts/data_preparation/ETTh1/generate_training_data.py
+++ b/scripts/data_preparation/ETTh1/generate_training_data.py
@@ -19,12 +19,13 @@
 domain = 'electricity transformer temperature'
 feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
 regular_settings = {
-    "INPUT_LEN": 336,
-    "OUTPUT_LEN": 336,
-    "TRAIN_VAL_TEST_RATIO": [0.6, 0.2, 0.2],
-    "NORM_EACH_CHANNEL": True,
-    "RESCALE": False,
-    "NULL_VAL": np.nan
+    'INPUT_LEN': 336,
+    'OUTPUT_LEN': 336,
+    'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+    'NORM_EACH_CHANNEL': True,
+    'RESCALE': False,
+    'METRICS': ['MAE', 'MSE'],
+    'NULL_VAL': np.nan
 }
 
 def load_and_preprocess_data():
@@ -95,7 +96,7 @@ def save_description(data):
         'feature_description': feature_description,
         'has_graph': graph_file_path is not None,
         'frequency (minutes)': frequency,
-        'regular_settings': regular_settings
+        'regular_settings': regular_settings,
     }
     description_path = os.path.join(output_dir, 'desc.json')
     with open(description_path, 'w') as f:
diff --git a/scripts/data_preparation/ETTh2/generate_training_data.py b/scripts/data_preparation/ETTh2/generate_training_data.py
index 5790a656..0034997a 100644
--- a/scripts/data_preparation/ETTh2/generate_training_data.py
+++ b/scripts/data_preparation/ETTh2/generate_training_data.py
@@ -19,12 +19,13 @@
 domain = 'electricity transformer temperature'
 feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
 regular_settings = {
-    "INPUT_LEN": 336,
-    "OUTPUT_LEN": 336,
-    "TRAIN_VAL_TEST_RATIO": [0.6, 0.2, 0.2],
-    "NORM_EACH_CHANNEL": True,
-    "RESCALE": False,
-    "NULL_VAL": np.nan
+    'INPUT_LEN': 336,
+    'OUTPUT_LEN': 336,
+    'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+    'NORM_EACH_CHANNEL': True,
+    'RESCALE': False,
+    'METRICS': ['MAE', 'MSE'],
+    'NULL_VAL': np.nan
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/ETTm1/generate_training_data.py b/scripts/data_preparation/ETTm1/generate_training_data.py
index 8791874b..73c6d11b 100644
--- a/scripts/data_preparation/ETTm1/generate_training_data.py
+++ b/scripts/data_preparation/ETTm1/generate_training_data.py
@@ -19,12 +19,13 @@
 domain = 'electricity transformer temperature'
 feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
 regular_settings = {
-    "INPUT_LEN": 336,
-    "OUTPUT_LEN": 336,
-    "TRAIN_VAL_TEST_RATIO": [0.6, 0.2, 0.2],
-    "NORM_EACH_CHANNEL": True,
-    "RESCALE": False,
-    "NULL_VAL": np.nan
+    'INPUT_LEN': 336,
+    'OUTPUT_LEN': 336,
+    'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+    'NORM_EACH_CHANNEL': True,
+    'RESCALE': False,
+    'METRICS': ['MAE', 'MSE'],
+    'NULL_VAL': np.nan
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/ETTm2/generate_training_data.py b/scripts/data_preparation/ETTm2/generate_training_data.py
index 8698bbeb..e78905a2 100644
--- a/scripts/data_preparation/ETTm2/generate_training_data.py
+++ b/scripts/data_preparation/ETTm2/generate_training_data.py
@@ -19,12 +19,13 @@
 domain = 'electricity transformer temperature'
 feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
 regular_settings = {
-    "INPUT_LEN": 336,
-    "OUTPUT_LEN": 336,
-    "TRAIN_VAL_TEST_RATIO": [0.6, 0.2, 0.2],
-    "NORM_EACH_CHANNEL": True,
-    "RESCALE": False,
-    "NULL_VAL": np.nan
+    'INPUT_LEN': 336,
+    'OUTPUT_LEN': 336,
+    'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+    'NORM_EACH_CHANNEL': True,
+    'RESCALE': False,
+    'METRICS': ['MAE', 'MSE'],
+    'NULL_VAL': np.nan
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/Electricity/generate_training_data.py b/scripts/data_preparation/Electricity/generate_training_data.py
index 85ef6a80..464498f5 100644
--- a/scripts/data_preparation/Electricity/generate_training_data.py
+++ b/scripts/data_preparation/Electricity/generate_training_data.py
@@ -19,12 +19,13 @@
 domain = 'electricity consumption'
 feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
 regular_settings = {
-    "INPUT_LEN": 336,
-    "OUTPUT_LEN": 336,
-    "TRAIN_VAL_TEST_RATIO": [0.7, 0.1, 0.2],
-    "NORM_EACH_CHANNEL": True,
-    "RESCALE": False,
-    "NULL_VAL": np.nan
+    'INPUT_LEN': 336,
+    'OUTPUT_LEN': 336,
+    'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+    'NORM_EACH_CHANNEL': True,
+    'RESCALE': False,
+    'METRICS': ['MAE', 'MSE'],
+    'NULL_VAL': np.nan
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/ExchangeRate/generate_training_data.py b/scripts/data_preparation/ExchangeRate/generate_training_data.py
index b5e0f4f5..2cf9ccf5 100644
--- a/scripts/data_preparation/ExchangeRate/generate_training_data.py
+++ b/scripts/data_preparation/ExchangeRate/generate_training_data.py
@@ -19,12 +19,13 @@
 domain = 'exchange rate'
 feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
 regular_settings = {
-    "INPUT_LEN": 336,
-    "OUTPUT_LEN": 336,
-    "TRAIN_VAL_TEST_RATIO": [0.7, 0.1, 0.2],
-    "NORM_EACH_CHANNEL": True,
-    "RESCALE": False,
-    "NULL_VAL": np.nan
+    'INPUT_LEN': 336,
+    'OUTPUT_LEN': 336,
+    'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+    'NORM_EACH_CHANNEL': True,
+    'RESCALE': False,
+    'METRICS': ['MAE', 'MSE'],
+    'NULL_VAL': np.nan
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/GBA/generate_training_data.py b/scripts/data_preparation/GBA/generate_training_data.py
index a1bd037f..0b8e83a7 100644
--- a/scripts/data_preparation/GBA/generate_training_data.py
+++ b/scripts/data_preparation/GBA/generate_training_data.py
@@ -22,12 +22,13 @@
 domain = 'traffic flow'
 feature_description = [domain, 'time of day', 'day of week']
 regular_settings = {
-    "INPUT_LEN": 12,
-    "OUTPUT_LEN": 12,
-    "TRAIN_VAL_TEST_RATIO": [0.6, 0.2, 0.2],
-    "NORM_EACH_CHANNEL": False,
-    "RESCALE": True,
-    "NULL_VAL": 0.0
+    'INPUT_LEN': 12,
+    'OUTPUT_LEN': 12,
+    'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+    'NORM_EACH_CHANNEL': False,
+    'RESCALE': True,
+    'METRICS': ['MAE', 'RMSE', 'MAPE'],
+    'NULL_VAL': 0.0
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/GLA/generate_training_data.py b/scripts/data_preparation/GLA/generate_training_data.py
index c7733a0d..3cdb21d1 100644
--- a/scripts/data_preparation/GLA/generate_training_data.py
+++ b/scripts/data_preparation/GLA/generate_training_data.py
@@ -22,12 +22,13 @@
 domain = 'traffic flow'
 feature_description = [domain, 'time of day', 'day of week']
 regular_settings = {
-    "INPUT_LEN": 12,
-    "OUTPUT_LEN": 12,
-    "TRAIN_VAL_TEST_RATIO": [0.6, 0.2, 0.2],
-    "NORM_EACH_CHANNEL": False,
-    "RESCALE": True,
-    "NULL_VAL": 0.0
+    'INPUT_LEN': 12,
+    'OUTPUT_LEN': 12,
+    'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+    'NORM_EACH_CHANNEL': False,
+    'RESCALE': True,
+    'METRICS': ['MAE', 'RMSE', 'MAPE'],
+    'NULL_VAL': 0.0
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/Gaussian/generate_training_data.py b/scripts/data_preparation/Gaussian/generate_training_data.py
index 3e6c8263..946cb6a5 100644
--- a/scripts/data_preparation/Gaussian/generate_training_data.py
+++ b/scripts/data_preparation/Gaussian/generate_training_data.py
@@ -14,12 +14,12 @@
 domain = 'simulated Gaussian data'
 feature_description = [domain]
 regular_settings = {
-    "INPUT_LEN": 336,
-    "OUTPUT_LEN": 336,
-    "TRAIN_VAL_TEST_RATIO": [0.7, 0.1, 0.2],
-    "NORM_EACH_CHANNEL": False,
-    "RESCALE": True,
-    "NULL_VAL": np.nan
+    'INPUT_LEN': 336,
+    'OUTPUT_LEN': 336,
+    'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+    'NORM_EACH_CHANNEL': False,
+    'RESCALE': True,
+    'NULL_VAL': np.nan
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/Illness/generate_training_data.py b/scripts/data_preparation/Illness/generate_training_data.py
index b1b708e7..47feb121 100644
--- a/scripts/data_preparation/Illness/generate_training_data.py
+++ b/scripts/data_preparation/Illness/generate_training_data.py
@@ -19,12 +19,13 @@
 domain = 'ilness data'
 feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
 regular_settings = {
-    "INPUT_LEN": 96,
-    "OUTPUT_LEN": 48,
-    "TRAIN_VAL_TEST_RATIO": [0.7, 0.1, 0.2],
-    "NORM_EACH_CHANNEL": True,
-    "RESCALE": False,
-    "NULL_VAL": np.nan
+    'INPUT_LEN': 96,
+    'OUTPUT_LEN': 48,
+    'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+    'NORM_EACH_CHANNEL': True,
+    'RESCALE': False,
+    'METRICS': ['MAE', 'MSE'],
+    'NULL_VAL': np.nan
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/METR-LA/generate_training_data.py b/scripts/data_preparation/METR-LA/generate_training_data.py
index 72310c6f..b5d13e0e 100644
--- a/scripts/data_preparation/METR-LA/generate_training_data.py
+++ b/scripts/data_preparation/METR-LA/generate_training_data.py
@@ -20,12 +20,13 @@
 domain = 'traffic speed'
 feature_description = [domain, 'time of day', 'day of week']
 regular_settings = {
-    "INPUT_LEN": 12,
-    "OUTPUT_LEN": 12,
-    "TRAIN_VAL_TEST_RATIO": [0.7, 0.1, 0.2],
-    "NORM_EACH_CHANNEL": False,
-    "RESCALE": True,
-    "NULL_VAL": 0.0
+    'INPUT_LEN': 12,
+    'OUTPUT_LEN': 12,
+    'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+    'NORM_EACH_CHANNEL': False,
+    'RESCALE': True,
+    'METRICS': ['MAE', 'RMSE', 'MAPE'],
+    'NULL_VAL': 0.0
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/PEMS-BAY/generate_training_data.py b/scripts/data_preparation/PEMS-BAY/generate_training_data.py
index fbfce7df..83670af4 100644
--- a/scripts/data_preparation/PEMS-BAY/generate_training_data.py
+++ b/scripts/data_preparation/PEMS-BAY/generate_training_data.py
@@ -20,12 +20,13 @@
 domain = 'traffic speed'
 feature_description = [domain, 'time of day', 'day of week']
 regular_settings = {
-    "INPUT_LEN": 12,
-    "OUTPUT_LEN": 12,
-    "TRAIN_VAL_TEST_RATIO": [0.7, 0.1, 0.2],
-    "NORM_EACH_CHANNEL": False,
-    "RESCALE": True,
-    "NULL_VAL": 0.0
+    'INPUT_LEN': 12,
+    'OUTPUT_LEN': 12,
+    'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+    'NORM_EACH_CHANNEL': False,
+    'RESCALE': True,
+    'METRICS': ['MAE', 'RMSE', 'MAPE'],
+    'NULL_VAL': 0.0
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/PEMS03/generate_training_data.py b/scripts/data_preparation/PEMS03/generate_training_data.py
index e1c8a989..bf212a5f 100644
--- a/scripts/data_preparation/PEMS03/generate_training_data.py
+++ b/scripts/data_preparation/PEMS03/generate_training_data.py
@@ -19,12 +19,13 @@
 domain = 'traffic flow'
 feature_description = [domain, 'time of day', 'day of week']
 regular_settings = {
-    "INPUT_LEN": 12,
-    "OUTPUT_LEN": 12,
-    "TRAIN_VAL_TEST_RATIO": [0.6, 0.2, 0.2],
-    "NORM_EACH_CHANNEL": False,
-    "RESCALE": True,
-    "NULL_VAL": 0.0
+    'INPUT_LEN': 12,
+    'OUTPUT_LEN': 12,
+    'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+    'NORM_EACH_CHANNEL': False,
+    'RESCALE': True,
+    'METRICS': ['MAE', 'RMSE', 'MAPE'],
+    'NULL_VAL': 0.0
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/PEMS04/generate_training_data.py b/scripts/data_preparation/PEMS04/generate_training_data.py
index bd7374f2..ec3b9e3d 100644
--- a/scripts/data_preparation/PEMS04/generate_training_data.py
+++ b/scripts/data_preparation/PEMS04/generate_training_data.py
@@ -19,12 +19,13 @@
 domain = 'traffic flow'
 feature_description = [domain, 'time of day', 'day of week']
 regular_settings = {
-    "INPUT_LEN": 12,
-    "OUTPUT_LEN": 12,
-    "TRAIN_VAL_TEST_RATIO": [0.6, 0.2, 0.2],
-    "NORM_EACH_CHANNEL": False,
-    "RESCALE": True,
-    "NULL_VAL": 0.0
+    'INPUT_LEN': 12,
+    'OUTPUT_LEN': 12,
+    'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+    'NORM_EACH_CHANNEL': False,
+    'RESCALE': True,
+    'METRICS': ['MAE', 'RMSE', 'MAPE'],
+    'NULL_VAL': 0.0
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/PEMS07/generate_training_data.py b/scripts/data_preparation/PEMS07/generate_training_data.py
index 73106860..559c1797 100644
--- a/scripts/data_preparation/PEMS07/generate_training_data.py
+++ b/scripts/data_preparation/PEMS07/generate_training_data.py
@@ -19,12 +19,13 @@
 domain = 'traffic flow'
 feature_description = [domain, 'time of day', 'day of week']
 regular_settings = {
-    "INPUT_LEN": 12,
-    "OUTPUT_LEN": 12,
-    "TRAIN_VAL_TEST_RATIO": [0.6, 0.2, 0.2],
-    "NORM_EACH_CHANNEL": False,
-    "RESCALE": True,
-    "NULL_VAL": 0.0
+    'INPUT_LEN': 12,
+    'OUTPUT_LEN': 12,
+    'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+    'NORM_EACH_CHANNEL': False,
+    'RESCALE': True,
+    'METRICS': ['MAE', 'RMSE', 'MAPE'],
+    'NULL_VAL': 0.0
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/PEMS08/generate_training_data.py b/scripts/data_preparation/PEMS08/generate_training_data.py
index 7647676f..28a1e6e8 100644
--- a/scripts/data_preparation/PEMS08/generate_training_data.py
+++ b/scripts/data_preparation/PEMS08/generate_training_data.py
@@ -19,12 +19,13 @@
 domain = 'traffic flow'
 feature_description = [domain, 'time of day', 'day of week']
 regular_settings = {
-    "INPUT_LEN": 12,
-    "OUTPUT_LEN": 12,
-    "TRAIN_VAL_TEST_RATIO": [0.6, 0.2, 0.2],
-    "NORM_EACH_CHANNEL": False,
-    "RESCALE": True,
-    "NULL_VAL": 0.0
+    'INPUT_LEN': 12,
+    'OUTPUT_LEN': 12,
+    'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+    'NORM_EACH_CHANNEL': False,
+    'RESCALE': True,
+    'METRICS': ['MAE', 'RMSE', 'MAPE'],
+    'NULL_VAL': 0.0
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/Pulse/generate_training_data.py b/scripts/data_preparation/Pulse/generate_training_data.py
index f20d6ac6..2e6ebe8f 100644
--- a/scripts/data_preparation/Pulse/generate_training_data.py
+++ b/scripts/data_preparation/Pulse/generate_training_data.py
@@ -14,12 +14,13 @@
 domain = 'simulated pulse data'
 feature_description = [domain]
 regular_settings = {
-    "INPUT_LEN": 336,
-    "OUTPUT_LEN": 336,
-    "TRAIN_VAL_TEST_RATIO": [0.7, 0.1, 0.2],
-    "NORM_EACH_CHANNEL": False,
-    "RESCALE": True,
-    "NULL_VAL": np.nan
+    'INPUT_LEN': 336,
+    'OUTPUT_LEN': 336,
+    'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+    'NORM_EACH_CHANNEL': False,
+    'RESCALE': True,
+    'METRICS': ['MAE', 'RMSE', 'MAPE'],
+    'NULL_VAL': np.nan
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/SD/generate_training_data.py b/scripts/data_preparation/SD/generate_training_data.py
index b6111f98..01d08eae 100644
--- a/scripts/data_preparation/SD/generate_training_data.py
+++ b/scripts/data_preparation/SD/generate_training_data.py
@@ -22,12 +22,13 @@
 domain = 'traffic flow'
 feature_description = [domain, 'time of day', 'day of week']
 regular_settings = {
-    "INPUT_LEN": 12,
-    "OUTPUT_LEN": 12,
-    "TRAIN_VAL_TEST_RATIO": [0.6, 0.2, 0.2],
-    "NORM_EACH_CHANNEL": False,
-    "RESCALE": True,
-    "NULL_VAL": 0.0
+    'INPUT_LEN': 12,
+    'OUTPUT_LEN': 12,
+    'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
+    'NORM_EACH_CHANNEL': False,
+    'RESCALE': True,
+    'METRICS': ['MAE', 'RMSE', 'MAPE'],
+    'NULL_VAL': 0.0
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/Traffic/generate_training_data.py b/scripts/data_preparation/Traffic/generate_training_data.py
index 49cdb1c7..dd855ab2 100644
--- a/scripts/data_preparation/Traffic/generate_training_data.py
+++ b/scripts/data_preparation/Traffic/generate_training_data.py
@@ -19,12 +19,13 @@
 domain = 'road occupancy rates'
 feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
 regular_settings = {
-    "INPUT_LEN": 336,
-    "OUTPUT_LEN": 336,
-    "TRAIN_VAL_TEST_RATIO": [0.7, 0.1, 0.2],
-    "NORM_EACH_CHANNEL": True,
-    "RESCALE": False,
-    "NULL_VAL": np.nan
+    'INPUT_LEN': 336,
+    'OUTPUT_LEN': 336,
+    'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+    'NORM_EACH_CHANNEL': True,
+    'RESCALE': False,
+    'METRICS': ['MAE', 'MSE'],
+    'NULL_VAL': np.nan
 }
 
 def load_and_preprocess_data():
diff --git a/scripts/data_preparation/Weather/generate_training_data.py b/scripts/data_preparation/Weather/generate_training_data.py
index c43185d5..d531464e 100644
--- a/scripts/data_preparation/Weather/generate_training_data.py
+++ b/scripts/data_preparation/Weather/generate_training_data.py
@@ -19,12 +19,13 @@
 domain = 'weather'
 feature_description = [domain, 'time of day', 'day of week', 'day of week', 'day of year']
 regular_settings = {
-    "INPUT_LEN": 336,
-    "OUTPUT_LEN": 336,
-    "TRAIN_VAL_TEST_RATIO": [0.7, 0.1, 0.2],
-    "NORM_EACH_CHANNEL": True,
-    "RESCALE": False,
-    "NULL_VAL": np.nan
+    'INPUT_LEN': 336,
+    'OUTPUT_LEN': 336,
+    'TRAIN_VAL_TEST_RATIO': [0.7, 0.1, 0.2],
+    'NORM_EACH_CHANNEL': True,
+    'RESCALE': False,
+    'METRICS': ['MAE', 'MSE'],
+    'NULL_VAL': np.nan
 }
 
 def load_and_preprocess_data():