Skip to content

Commit

Permalink
add Gaussian noise simulator
Browse files Browse the repository at this point in the history
  • Loading branch information
zezhishao committed May 21, 2024
1 parent 318f223 commit f80b566
Show file tree
Hide file tree
Showing 2 changed files with 148 additions and 0 deletions.
122 changes: 122 additions & 0 deletions scripts/data_preparation/Gaussian/generate_training_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import os
import sys
import shutil
import pickle
import argparse

import numpy as np

# TODO: remove it when basicts can be installed by pip
sys.path.append(os.path.abspath(__file__ + "/../../../.."))
from basicts.data.transform import standard_transform


def generate_data(args: argparse.Namespace):
"""Preprocess and generate train/valid/test datasets.
Args:
args (argparse): configurations of preprocessing
"""

target_channel = args.target_channel
future_seq_len = args.future_seq_len
history_seq_len = args.history_seq_len
output_dir = args.output_dir
train_ratio = args.train_ratio
valid_ratio = args.valid_ratio
data_file_path = args.data_file_path
norm_each_channel = args.norm_each_channel
if_rescale = not norm_each_channel # if evaluate on rescaled data. see `basicts.runner.base_tsf_runner.BaseTimeSeriesForecastingRunner.build_train_dataset` for details.

# read data
data = np.load(data_file_path)
data = data[..., target_channel]
print("raw time series shape: {0}".format(data.shape))

# split data
l, n, f = data.shape
num_samples = l - (history_seq_len + future_seq_len) + 1
train_num = round(num_samples * train_ratio)
valid_num = round(num_samples * valid_ratio)
test_num = num_samples - train_num - valid_num
print("number of training samples:{0}".format(train_num))
print("number of validation samples:{0}".format(valid_num))
print("number of test samples:{0}".format(test_num))

index_list = []
for t in range(history_seq_len, num_samples + history_seq_len):
index = (t-history_seq_len, t, t+future_seq_len)
index_list.append(index)

train_index = index_list[:train_num]
valid_index = index_list[train_num: train_num + valid_num]
test_index = index_list[train_num +
valid_num: train_num + valid_num + test_num]

# normalize data
scaler = standard_transform
data_norm = scaler(data, output_dir, train_index, history_seq_len, future_seq_len, norm_each_channel=norm_each_channel)

# add temporal feature
feature_list = [data_norm]

processed_data = np.concatenate(feature_list, axis=-1)

# save data
index = {}
index["train"] = train_index
index["valid"] = valid_index
index["test"] = test_index
with open(output_dir + "/index_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
pickle.dump(index, f)

data = {}
data["processed_data"] = processed_data
with open(output_dir + "/data_in_{0}_out_{1}_rescale_{2}.pkl".format(history_seq_len, future_seq_len, if_rescale), "wb") as f:
pickle.dump(data, f)


if __name__ == "__main__":
# sliding window size for generating history sequence and target sequence
HISTORY_SEQ_LEN = 96
FUTURE_SEQ_LEN = 96

TRAIN_RATIO = 0.6
VALID_RATIO = 0.2
TARGET_CHANNEL = [0] # target channel(s)

DATASET_NAME = "Gaussian"

OUTPUT_DIR = "datasets/" + DATASET_NAME
DATA_FILE_PATH = "datasets/raw_data/{0}/{0}.npy".format(DATASET_NAME)

parser = argparse.ArgumentParser()
parser.add_argument("--output_dir", type=str,
default=OUTPUT_DIR, help="Output directory.")
parser.add_argument("--data_file_path", type=str,
default=DATA_FILE_PATH, help="Raw traffic readings.")
parser.add_argument("--history_seq_len", type=int,
default=HISTORY_SEQ_LEN, help="Sequence Length.")
parser.add_argument("--future_seq_len", type=int,
default=FUTURE_SEQ_LEN, help="Sequence Length.")
parser.add_argument("--target_channel", type=list,
default=TARGET_CHANNEL, help="Selected channels.")
parser.add_argument("--train_ratio", type=float,
default=TRAIN_RATIO, help="Train ratio")
parser.add_argument("--valid_ratio", type=float,
default=VALID_RATIO, help="Validate ratio.")
parser.add_argument("--norm_each_channel", type=float, help="Validate ratio.")
args = parser.parse_args()

# print args
print("-"*(20+45+5))
for key, value in sorted(vars(args).items()):
print("|{0:>20} = {1:<45}|".format(key, str(value)))
print("-"*(20+45+5))

if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
args.norm_each_channel = True
generate_data(args)
args.norm_each_channel = False
generate_data(args)
26 changes: 26 additions & 0 deletions scripts/data_preparation/Gaussian/simulate_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
PROJECT_DIR = os.path.abspath(__file__ + "/../../../..")
os.chdir(PROJECT_DIR)


def generate_gaussian_noise_sequence(duration):
time_points = np.arange(0, duration, 1)
gaussion_noise_sequence = np.random.normal(0, 1, duration)
return time_points, gaussion_noise_sequence

# hyper parameterts
duration = 10000 # time series length

# generate gaussian sequence
time_points, gaussian_noise_sequence = generate_gaussian_noise_sequence(duration)

# save pulse sequence
import torch
data = torch.Tensor(gaussian_noise_sequence).unsqueeze(-1).unsqueeze(-1).numpy()
# mkdir datasets/raw_data/Gaussian
if not os.path.exists('datasets/raw_data/Gaussian'):
os.makedirs('datasets/raw_data/Gaussian')
np.save('datasets/raw_data/Gaussian/Gaussian.npy', data)

0 comments on commit f80b566

Please sign in to comment.