-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSARIMAX.py
117 lines (89 loc) · 3.95 KB
/
SARIMAX.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import pandas as pd
import matplotlib.pyplot as plt
import requests
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
import warnings
warnings.filterwarnings("ignore")
# Fetching data from the server
# url = "https://web-api.coinmarketcap.com/v1/cryptocurrency/ohlcv/historical"
# param = {"convert":"USD","slug":"bitcoin","time_end":"1601510400","time_start":"1367107200"}
# content = requests.get(url=url, params=param).json()
# df = pd.json_normalize(content['data']['quotes'])
# # Extracting and renaming the important variables
# df['Date']=pd.to_datetime(df['quote.USD.timestamp']).dt.tz_localize(None)
# df['Low'] = df['quote.USD.low']
# df['High'] = df['quote.USD.high']
# df['Open'] = df['quote.USD.open']
# df['Close'] = df['quote.USD.close']
# df['Volume'] = df['quote.USD.volume']
# # Drop original and redundant columns
# df=df.drop(columns=['time_open','time_close','time_high','time_low', 'quote.USD.low', 'quote.USD.high', 'quote.USD.open', 'quote.USD.close', 'quote.USD.volume', 'quote.USD.market_cap', 'quote.USD.timestamp'])
df = pd.read_csv('D:\\cr\\BTC\\excel\\BTSdategropby.csv', encoding='ISO-8859-1')
# Creating a new feature for better representing day-wise values
df['Mean'] = (df['Low'] + df['High'])/2
# Cleaning the data for any NaN or Null fields
df = df.dropna()
# Creating a copy for making small changes
dataset_for_prediction = df.copy()
dataset_for_prediction['Actual']=dataset_for_prediction['Mean'].shift()
dataset_for_prediction=dataset_for_prediction.dropna()
# date time typecast
dataset_for_prediction['Date'] =pd.to_datetime(dataset_for_prediction['Date'])
dataset_for_prediction.index= dataset_for_prediction['Date']
# normalizing the exogeneous variables
from sklearn.preprocessing import MinMaxScaler
sc_in = MinMaxScaler(feature_range=(0, 1))
scaled_input = sc_in.fit_transform(dataset_for_prediction[['Low', 'High', 'Open', 'Close', 'Volume', 'Mean']])
scaled_input = pd.DataFrame(scaled_input, index=dataset_for_prediction.index)
X=scaled_input
X.rename(columns={0:'Low', 1:'High', 2:'Open', 3:'Close', 4:'Volume', 5:'Mean'}, inplace=True)
print("Normalized X")
print(X.head())
# normalizing the time series
sc_out = MinMaxScaler(feature_range=(0, 1))
scaler_output = sc_out.fit_transform(dataset_for_prediction[['Actual']])
scaler_output =pd.DataFrame(scaler_output, index=dataset_for_prediction.index)
y=scaler_output
y.rename(columns={0:'BTC Price next day'}, inplace= True)
y.index=dataset_for_prediction.index
print("Normalized y")
print(y.head())
# train-test split (cannot shuffle in case of time series)
train_size=int(len(df) *0.9)
test_size = int(len(df)) - train_size
train_X, train_y = X[:train_size].dropna(), y[:train_size].dropna()
test_X, test_y = X[train_size:].dropna(), y[train_size:].dropna()
# Init the best SARIMAX model
from statsmodels.tsa.statespace.sarimax import SARIMAX
model = SARIMAX(
train_y,
exog=train_X,
order=(0,1,1),
seasonal_order =(0, 0, 1, 12),
enforce_invertibility=False,
enforce_stationarity=False
)
# training the model
results = model.fit()
# get predictions
predictions = results.predict(start =train_size, end=train_size+test_size-2,exog=test_X)
# setting up for plots
act = pd.DataFrame(scaler_output.iloc[train_size:, 0])
predictions=pd.DataFrame(predictions)
predictions.reset_index(drop=True, inplace=True)
predictions.index=test_X.index
predictions['Actual'] = act['BTC Price next day']
predictions.rename(columns={0:'Pred', 'predicted_mean':'Pred'}, inplace=True)
# post-processing inverting normalization
testPredict = sc_out.inverse_transform(predictions[['Pred']])
testActual = sc_out.inverse_transform(predictions[['Actual']])
# prediction plots
plt.figure(figsize=(20,10))
plt.plot(predictions.index, testActual, label='Pred', color='blue')
plt.plot(predictions.index, testPredict, label='Actual', color='red')
plt.legend()
plt.show()
# print RMSE
from statsmodels.tools.eval_measures import rmse
print("RMSE:",rmse(testActual, testPredict))