-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
448 lines (362 loc) · 16.7 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
# PROSPECTION Version 1.1
# Last updated Spetember 14, 2020
# Author: Mike Ferguson
# See Repo for more details.
# Please Read the README
# -------------------------------------------------------------------------------------------------------------------
# main.py - contains the driver code for predicting stock opening prices 2 days from now
# based on: https://medium.com/@randerson112358/stock-price-prediction-using-python-machine-learning-e82a039ac2bb
# Most of core code came from above source. This program adapts it into short term predictions and also puts it inside
# a wrapper program to interact with, with the addition of different modes and multi-stock prediction.
# Program predicts opening price two days in advance. Here is how to use:
# Run model after hours on current day(today), after the prices have stablized - sometime between 6pm and 9:00am EST
# Stocks will be queued to buy on market open at 9:30 AM. Once 9:30 hits, stocks will be bought.
# The day will play out and markets will close at 4:00 and after hours at 6:00pm.
# Sell stocks next day at market open at predicted market open price.
#
# EXAMPLE:
# Evening, 1/1/2020: Run Model after Hours to see what stocks will go up by morning of 1/3/2020. Stocks are queued
# Morning, 1/2/2020: Stocks are bought at open prices. Re-Run program for two days from now.
# Evening, 1/2/2020: Stocks have their day.
# Morning, 1/3/2020: Sell stocks at predicted open price. Repeat each day. Make Bank.
#
# -------------------------------------------------------------------------------------------------------------------
# Import statements
import random
import time
import math
from itertools import chain
import pandas_datareader as web
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import matplotlib.backends.backend_pdf
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# Needed Functions
# ---------------------------------------------------------------------------------------------------------------------
# up_down(df): takes in a dataframe and looks at each stock. That was predicted. If it was
# predicted to go up, then returns up; else 0.
# parameters: a dataframe object, df
# outputs: column in the number of UPS/DOWNs the model predicted
def up_down(df):
ups = []
downs = []
for index, row in df.iterrows():
predicted_profit = row["Predicted Profit"]
if predicted_profit >= 0:
ups.append('UP')
else:
downs.append("DOWN")
return len(ups), len(downs)
# parameters: stock (for i mode, which stock(s) to look at), save_graphs (True for saving, False for not)
def predict_open(stock, save_graphs, verbose, to_verify):
todays_date = datetime.date(datetime.now())
yesterdays_date = datetime.strftime(datetime.now() - timedelta(1), '%Y-%m-%d')
tomorrows_date = datetime.strftime(datetime.now() + timedelta(1), '%Y-%m-%d')
if(verbose):
print('*** Fetching Most Up-To-Date Stock Information...')
df = web.DataReader(stock, data_source='yahoo', start='2012-01-01', end=todays_date)
data = df.filter(['Close'])
data_y = df.filter(['Open'])
dataset = data.values
dataset_y = data_y.values
data_length = len(dataset)
training_data_len = math.ceil(len(dataset) * .80)
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)
scaled_data_y = scaler.fit_transform(dataset_y)
if(verbose):
print('*** Data Fetched. Training will commence...')
# Create the scaled training data set
train_data = scaled_data[0:training_data_len, :]
train_data_y = scaled_data_y[0:training_data_len, :]
# Split the data into x_train and y_train data sets
x_train = []
y_train = []
# max = len(train_data) - 1
# max = int(len(train_data) / 50)
max = 2
# print("Days in : ", max)
for i in range(max, len(train_data)):
x_train.append(train_data[i - max:i, 0])
y_train.append(train_data_y[i, 0])
# Convert x_train and y_train to numpy arrays
x_train, y_train = np.array(x_train), np.array(y_train)
# print("Shape of x_train: ", x_train.shape)
# print("Shape of y_train:", y_train.shape)
# Reshape the data into the shape accepted by the LSTM
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
# actual mode creation
model = Sequential()
model.add(LSTM(units=128, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.20))
model.add(LSTM(units=64, return_sequences=False))
model.add(Dropout(0.20))
model.add(Dense(units=32))
model.add(Dense(units=16))
model.add(Dense(units=1))
if (verbose):
model.summary()
# compiles the model
model.compile(optimizer='adam', loss='mean_squared_error')
# control for verify version
if to_verify:
history = model.fit(x_train, y_train, batch_size=1, epochs=10)
else:
history = model.fit(x_train, y_train, batch_size=1, epochs=3)
# Test data set
test_data = scaled_data[training_data_len - max:, :]
# Create the x_test and y_test data sets
x_test = []
y_test = dataset_y[training_data_len:, :]
for i in range(max, len(test_data)):
x_test.append(test_data[i - max:i, 0])
# Convert x_test to a numpy array
x_test = np.array(x_test)
# Reshape the data into the shape accepted by the LSTM
x_test_2 = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
# Get the models predicted price values
predictions = model.predict(x_test_2)
predictions = scaler.inverse_transform(predictions) # Undo scaling
# Optional Shift value- if the model is constantly over/underpredicting on test, shift value will fix that/
# postive shift value moves predictions up(under predicting), negative down(over predicting)
shift = 0.000025
pred = predictions + (predictions * shift)
# Calculate/Get the value of RMSE and normalize
rmse = np.sqrt(np.mean(((pred - y_test) ** 2))) / np.mean(y_test)
# set up data for graphs
train = data_y[:training_data_len]
valid = data_y[training_data_len:]
valid2 = valid.copy(deep=True)
valid2['Predictions'] = predictions + (predictions * shift)
# creates and saved graphs if wanted
if save_graphs:
if verbose:
print('*** Creating and Exporting Graph...')
plt.figure(figsize=(16, 8))
plt.title(stock + " Open Price in Two Days, Based on entire History")
plt.xlabel('Date', fontsize=18)
plt.ylabel('Open Price USD ($)', fontsize=18)
plt.plot(train['Open'])
plt.plot(valid2[['Open', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower left')
# plt.show()
if verbose:
print('*** Graph Created and Saved...')
if verbose:
print('*** Summarizing Results...')
# Get the quote
quote = web.DataReader(stock, data_source='yahoo', start='2012-01-01', end=todays_date)
# Create a new dataframe
new_df = quote.filter(['Close'])
# Get the last 2 day Open price
# Determines the number of TimeStamps you want - how far back into the past you need to go
last_days = new_df[-max:].values
# Scale the data to be values between 0 and 1
last_days_scaled = scaler.transform(last_days)
# Create an empty list
final_pred = []
# Append the past 2 days
final_pred.append(last_days_scaled)
# Convert the X_test data set to a numpy array
final_pred = np.array(final_pred)
# Reshape the data
final_pred_np = np.reshape(final_pred, (final_pred.shape[0], final_pred.shape[1], 1))
# Get the predicted scaled price
pred_price = model.predict(final_pred_np)
# undo the scaling
pred_open = scaler.inverse_transform(pred_price)
# get todays close (or most recent)
old_close = df['Close'].iloc[-1]
old_close_date = df.axes[0].tolist()[-1]
# print("Old Close Date", old_close_date)
# creates info to view and append to DataFrame
pred_profit = pred_open[0][0] - old_close
pred_mvmt_perc = round((pred_profit / old_close) * 100, 5)
# print("Old Close Price (Today): ", old_close)
# print("Predicted Open (2 Days from Now): ", pred_open[0][0])
# print("Predicted Profit: ", pred_profit)
# print("Model NRMSE: ", rmse)
# print("Predicted Movmement Percent: ", pred_mvmt_perc)
if (verbose):
print('*** Done.')
return stock, old_close, round(pred_open[0][0], 3), round(pred_profit, 3), rmse, pred_mvmt_perc, plt
# ---------------------------------------------------------------------------------------------------------------------
# Make Table method.
# Parameters: mode(user mode), how_many(for r mode), and stock(for i mode)
# calls the predict method appropriately for each mode.
def make_table(the_list, to_verify):
# init lists to use
old_closes = []
new_closes = []
pred_profits = []
pred_movement_percents = []
rmses = []
i = 1
plots = []
# main code; looks at each stock and gets all data needed.
for each_stock in the_list:
print("------------------------------------------------------------")
print("Predicting: ", each_stock, "(Stock Number: ", str(i) + ")")
# Boolean Value Controls graph: True displays predicted/actual graph, False does not
# graphs, verbose, to_verfiy
if to_verify:
stock, old_close, pred_close, pred_profit, rmse, pred_mvmt_perc, graph = predict_open(each_stock,
True, False, True)
else:
stock, old_close, pred_close, pred_profit, rmse, pred_mvmt_perc, graph = predict_open(each_stock,
True, False, False)
old_closes.append(old_close)
new_closes.append(pred_close)
pred_profits.append(pred_profit)
rmses.append(rmse)
pred_movement_percents.append(pred_mvmt_perc)
plots.append(graph)
i = i + 1
# creates resultant dataframe and adds data
df_pred = pd.DataFrame(the_list, columns=['Stock'])
df_pred['Today Open '] = old_closes
df_pred['Predicted Open in Two days'] = new_closes
df_pred['Predicted Profit'] = pred_profits
df_pred['Profit Percentage'] = pred_movement_percents
df_pred["Predicted Profit @ 5 Shares"] = df_pred['Predicted Profit'] * 5
df_pred["Predicted Profit @ 10 Shares"] = df_pred['Predicted Profit'] * 10
df_pred["Predicted Profit @ 20 Shares"] = df_pred['Predicted Profit'] * 20
df_pred["Predicted Profit @ 50 Shares"] = df_pred['Predicted Profit'] * 50
df_pred["Predicted Profit @ 100 Shares"] = df_pred['Predicted Profit'] * 100
df_pred["Predicted Profit @ 500 Shares"] = df_pred['Predicted Profit'] * 500
df_pred["Predicted Profit @ 1000 Shares"] = df_pred['Predicted Profit'] * 1000
df_pred['Test RMSE'] = rmses
# changes pandas settings to print entire frame
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
# main (raw) result: the sorted dataframe by predicted percent profit
result = df_pred.sort_values(by='Profit Percentage', ascending=False, )
# IMPORTANT: this filters the dataframe based on the RMSE. If the RMSE for that stock was
# above the cutoff, then it will not show.
cutoff = 0.075
rmse_result = result[result['Test RMSE'] <= cutoff]
print(rmse_result)
# gets metadata for model
avg_rmse = round(df_pred["Test RMSE"].mean(), 5)
print("Average Model RMSE: ", avg_rmse)
# saves results to stock_report.txt
print("Saving DataFrame with relevant information to stock_report.txt")
result.to_pickle("stock_report.txt")
print("Results Saved.")
return plots
# ---------------------------------------------------------------------------------------------------------------------
# verify method.
# Parameters: mode(user mode), how_many(for r mode), and stock(for i mode)
# calls the predict method appropriatly for each mode.
def verify(the_list):
print("Verifying stocks entered...")
# init variables
runs = 5
new_list = []
# converting the input list into a method-friendly list
for stock in the_list:
new_list.append([stock] * runs)
# covnerts to graphs
flat = list(chain.from_iterable(new_list))
graphs = make_table(flat, True)
new_df = pd.read_pickle("stock_report.txt")
# actual driver code
for stock in the_list:
print("--------------------------------------------------------")
print("Verification Report for " + str(stock) + ": ")
rslt_df = new_df[new_df['Stock'] == stock]
print(rslt_df)
avg_rmse = round(rslt_df["Test RMSE"].mean(), 4)
ups, downs = up_down(rslt_df)
up_prop = round(ups / (ups + downs), 3)
print(str(ups) + " ups and " + str(downs) + " downs ")
print("Up Percentage: " + str(up_prop)+ "%")
print("Average Model RMSE for " + str(stock) + ": " + str(avg_rmse))
if up_prop >= 0.80:
to_invest = "yes"
else:
to_invest = "no"
print(" **** BOTTOM LINE (INVEST YES/NO): ", to_invest, "***")
return graphs
# ---------------------------------------------------------------------------------------------------------------------
# call_methods method.
# Parameters: mode (user mode), how_many(for r mode), and stock(for i/v mode)
# returns: numpy gra[h objects of the stocks predicted
# calls the predict method appropriatly for each mode.
def call_methods(mode, stock, how_many):
# read in stock lists
the_list = []
df_sp500 = pd.read_csv("sp500.csv")
yeet = df_sp500['Symbol'].tolist()
# assign the list of stocks according to the mode:
if mode.lower() == "one stock" or mode.lower() == "verify":
stock_list = []
for each_stock in stock:
stock_list.append(each_stock)
the_list = stock_list
elif mode.lower() == "run":
randoms = []
for i in range(int(how_many)):
stocks_random = random.choice(yeet)
randoms.append(stocks_random)
the_list = randoms
else:
the_list = []
# controls for verify method
if mode.lower() == "verify":
graphs = verify(the_list)
return graphs
else:
graphs = make_table(the_list, False)
return graphs
# ---------------------------------------------------------------------------------------------------------------------
# Code to provide user interaction and seperation into multiple modes.
def main():
to_end = False
while to_end is False:
# quit code
mode = input("Which mode would you like to Enter? Run(r), Individual Stocks(i), verify(v), quit(q)")
if mode == "q":
to_end = True
print("--------------------------------------------------------")
# run mode code
if mode.lower() == "r":
print("Entered Run Mode.")
how_many = input("How Many stocks from the NYSE list would you like to predict? (randomly chosen)")
graphs = call_methods("run", "", how_many)
pdf = matplotlib.backends.backend_pdf.PdfPages("predicted_graphs.pdf")
for fig in range(1, len(graphs) + 1):
pdf.savefig(fig)
pdf.close()
# individual stock mode:
elif mode.lower() == "i":
print("Entered Individual Stocks Mode.")
which_stocks = input("Enter Stock Tickers to predict, separated by a comma, like this: TWTR,AAPL,GE").replace(" ", "").split(",")
graphs = call_methods("one stock", which_stocks, 0)
pdf = matplotlib.backends.backend_pdf.PdfPages("predicted_graphs.pdf")
for fig in range(1, len(graphs) + 1):
pdf.savefig(fig)
pdf.close()
elif mode.lower() == "v":
print("Entered Verfication Mode")
which_stocks = input("Enter Stock Tickers to predict, separated by a comma, like this: TWTR,AAPL,GE").replace(" ", "").split(",")
result = call_methods("verify", which_stocks, 0)
to_end = True
# Quit Mode: Quits the Program (redundant, as a precaution)
elif mode.lower() == "q":
print("Process Quit.")
to_end = True
# mode was not recognized
else:
print("Unrecognized Input, please try again.")
# ---------------------------------------------------------------------------------------------------------------------
# call main method
main()