-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdemo_run.py
401 lines (337 loc) · 14.5 KB
/
demo_run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
def run_demo():
# IMPORTING OTHER FILES
import Import_Data_To_DB as import_data
import Create_Table_And_Import_Data_From_EDGAR as create_Table_and_Import
#
# IMPORTS FOR CHECKING IF DB IS UP TO DATE
import datetime as dt
#
# IMPORTS FOR EDGAR
from sec_api import QueryApi
import simplejson as json
#
#
# IMPORTS FOR SQL DB CONNECTION
import pymysql,os,requests,operator
print("Ready to connect to the database?")
yes = input()
CHANGE FIELDS TO YOUR OWN PERSONAL FIELDS
db = pymysql.connect(
host = 'your database hosting site',
user = 'your username',
password = 'your password',
db = 'Name of the database you are connecting to'
)
c = db.cursor()
Your_Table_Name = ""
print("Have you created the table yet? If not, type NO and press Enter.")
yes = str(input())
yes = yes.upper()
if(yes == "NO"):
# This will create the database table,
# and then import all holdings from the 13F Filing Data from each fund
# into the table
Your_Table_Name = create_Table_and_Import.run_create_and_import()
else:
Your_Table_Name = "All_Holdings_Raw_Data"
print("Table has been created and populated.")
#Find the date of the last available filing.
#@param date is the date on which the code is being run
def quarter(date):
if date.month == 1:
year = date.year - 1
return str(year) + '-09-30'
elif date.month == 2:
year = date.year - 1
if date.day < 16:
return str(year) + '-09-30'
return str(year) + '-12-31'
elif date.month == 3:
year = date.year - 1
return str(year) + '-12-31'
elif date.month == 4:
year = date.year - 1
return str(year) + '-12-31'
elif date.month == 5:
year = date.year - 1
if date.day < 16:
return str(year) + '-12-31'
return str(date.year) + '-03-31'
elif date.month == 6:
return str(date.year) + '-03-31'
elif date.month == 7:
return str(date.year) + '-03-31'
elif date.month == 8:
if date.day < 16:
return str(date.year) + '-03-31'
return str(date.year) + '-06-30'
elif date.month == 9:
return str(date.year) + '-06-30'
elif date.month == 10:
return str(date.year) + '-06-30'
elif date.month == 11:
if date.day < 16:
return str(date.year) + '-06-30'
return str(date.year) + '-09-30'
elif date.month == 12:
return str(date.year) + '-09-30'
last_quarter = quarter(dt.datetime.now())
two_ago = quarter(dt.datetime.now()-dt.timedelta(90))
#Check if database contains the most updated filings
#If not, add the newest data to the database
def check_if_db_is_updated():
sql = '''
SELECT * FROM HF_13f_filings.All_Holdings_Raw_Data
order by filingDate desc
limit 1
'''
c.execute(sql)
data=c.fetchall()
most_recent_date = []
if(quarter(dt.datetime.now()) != data[0][0]):
import_data.run_import(1)
print("Press Enter to continue.")
yes = input()
print("Now checking if the db is up to date.")
check_if_db_is_updated()
print("Now pulling the top 25 stocks per fund from db, each with: ")
print("value($), CIK, CUSIP, nameOfIssuer, filingDate, fundName, and ")
print(" the the amount of shares that were bought/sold of the stock between the most recent and its previous quarter by the fund.")
print("Press Enter to continue.")
yes = input()
create_temp_table = '''
Create temporary table sumValues
select sum(valueInDollars) as valSum,cik,cusip,filingDate,nameOfIssuer
from %s
where filingDate = "%s"
group by filingDate, cik, cusip;
''' % (Your_Table_Name, last_quarter)
create_second_temp_table = '''
Create temporary table sumValues2
select sum(valueInDollars) as valSum,cik,cusip,filingDate,nameOfIssuer
from %s
where filingDate in ("%s")
group by filingDate, cik, cusip;
''' % (Your_Table_Name, two_ago)
get_top_25_plus_turnover = '''
SELECT valSum,cik,cusip,nameOfIssuer,filingDate,name,rnk,totals
FROM (
SELECT s.valSum,s.cik,s.cusip,s.nameOfIssuer,s.filingDate,f.name, v.filingDate as vfilingDate, v.CIK as vCIK, v.valSum as vValSum, (s.valSum - ifnull(v.Valsum,0)) as totals , RANK() OVER (PARTITION BY s.CIK, s.filingDate ORDER BY s.valSum desc,s.nameOfIssuer) AS rnk
FROM sumValues as s
LEFT JOIN sumValues2 as v on (s.CIK = v.CIK and s.cusip=v.cusip and v.filingDate = "%s")
LEFT JOIN HF_Data as f on (s.CIK = f.CIK)
order by s.cik, rnk
) AS x
WHERE x.rnk <= 25;
''' % two_ago
drop_temp_table = "drop temporary table sumValues;"
c.execute(create_temp_table)
c.execute(create_second_temp_table)
c.execute(get_top_25_plus_turnover)
data = c.fetchall()
c.execute(drop_temp_table)
json_stocks_string = json.dumps(data)
json_stocks = (json.loads(json_stocks_string))
print("Now running our own formula to produce a list of the top 25 ranked stocks averaged over all funds.")
print("Press Enter to continue.")
yes = input()
fund_dict = {}
stock_dict = {}
best_stocks = []
# Assign a score to each stock based on its placement in each fund's top 25
for i in json_stocks:
i_cik = i[1]
i_rnk = i[6]
fund_dict[i_cik] = i_rnk
keys = list(fund_dict)
for i in json_stocks:
i_cusip = i[2]
stock_dict[i_cusip] = 0
for i in json_stocks:
i_cusip = i[2]
i_nameOfIssuer = i[3]
name_and_cusip = (i_cusip,i_nameOfIssuer)
i_rnk = i[6]
divisor = fund_dict[i[1]]
new_rank = int(i_rnk)/int(divisor)
# this is going to skew the rating by 0.04 because the rankings go from 1-25, which when divided by the max of 25, is .04-1
if(new_rank <= 0.2):
new_rank = 5
elif(0.2 < new_rank <= 0.4):
new_rank = 4
elif(0.4 < new_rank <= 0.6):
new_rank = 3
elif(0.6 < new_rank <= 0.8):
new_rank = 2
elif(0.8 < new_rank <= 1):
new_rank = 1
stock_dict[i_cusip] += new_rank
stocks = list(stock_dict)
for i in range(len(stock_dict)):
stock_dict[stocks[i]] /= 27
stock_dict = dict(sorted(stock_dict.items(), key=operator.itemgetter(1),reverse=True))
stocks = list(stock_dict)
for i in range(1,26):
best_stocks.append(stocks[i])
return_list = []
def validate_nine_chars(cusip):
if(cusip == "46090"):
return "46090E103"
zero = 0
while(len(cusip) < 9):
cusip = str(zero) + cusip
return cusip
# this is for transforming the CUSIP into a stock ticker.
for i in range(len(best_stocks)):
cusip = validate_nine_chars(best_stocks[i])
urlBeg = 'https://finnhub.io/api/v1/search?q='
urlEnd = '&token=c3rnfbaad3i88nmm0ai0'
fullUrl = str(urlBeg)+str(cusip)+str(urlEnd)
r = requests.get(fullUrl)
f = r.json()
temp = f.get('result')
temp2 = temp[0].get('symbol')
return_list.append(temp2)
print("Done running formula to produce the top 25 stocks averaged across all the funds.")
print("The list of stock tickers is: ")
print(return_list)
print("Press Enter to continue:")
yes = input()
print("Now running LSTM model on each stock ticker")
# IMPORTS FOR LSTM
# Import the libraries
# Warnings
import warnings
warnings.filterwarnings('ignore')
## math library for mathematical function
import math
## labeled data view in dataframe
import pandas as pd
## data reader od panda is used fetch the data from web
import pandas_datareader as web
## numpy is used to create multi dimensional array
import numpy as np
## tensorflow is uded to create DL model and wrapping the other libraries
import tensorflow as tf
## sklearn is providing ultility functions for standerdizing or scaling data
from sklearn.preprocessing import MinMaxScaler
## keras is a neural network library
from keras.layers import LSTM
from keras.layers import Dense
from keras.models import Sequential
# Runs the LSTM model on a stock ticker, using training data since January 1, 2018
# @param company is the stock ticker of the company being predicted
# @return the average percent error of actual vs. predicted results.
# percent error is given by 100 * (actual - predicted) / actual
def predict(company):
start = dt.datetime(2018,1,1)
end = dt.datetime.now()
data_frame = web.DataReader(company, 'yahoo', start, end)
d = quarter(end)
scaling_data_frame = data_frame.filter(['Open','High','Low','Close','Adj Close','Volume'])
## Scaling the features
scaler = MinMaxScaler(feature_range=(0,1))
## Transform the data into
scaled_Data = scaler.fit_transform(scaling_data_frame)
## Display the scaled features into dataframe
scaled_data_frame = pd.DataFrame(data=scaled_Data, columns=['Open','High','Low','Close','Adj Close','Volume'] )
## Create a seperate dataframe with only colse column
stock_close_data = data_frame.filter(['Close'])
## Convert created dataframe into numpy array
stock_close_dataset = stock_close_data.values
df1 = data_frame['2000-01-01':d]
trainingDataLength = len(df1)
## Scaling the data its come under preprocessing stage
## Create feature range into 0,1
scaler = MinMaxScaler(feature_range=(0,1))
## Transform the data into
scaledData = scaler.fit_transform(stock_close_dataset)
## Create a new dataset which contain scaled value
StockTrainData = scaledData[0:trainingDataLength , :]
## Spliting the dataset into two parts such as Xtrain and Ytrain datasets
Xtrain = []
Ytrain = []
for i in range(60, len(StockTrainData)):
Xtrain.append(StockTrainData[i-60:i, 0])
Ytrain.append(StockTrainData[i, 0])
## Convert Xtrain data, Ytrain data into numpy array
Xtrain = np.array(Xtrain)
Ytrain = np.array(Ytrain)
## Reshape the Xtrain data (number of column and number of row)
Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], Xtrain.shape[1], 1))
## Develop LSTM model
model = Sequential()
## Assign neurons as 50
neurons = 50
## First LSTM layer
model.add(LSTM(neurons, return_sequences=True, input_shape= (Xtrain.shape[1], 1)))
## Second LSTM layer, no more layer for lstm so return_sequence is false
model.add(LSTM(neurons, return_sequences= False))
## Adding Dense layer which always have 25 neurons by default
model.add(Dense(25))
model.add(Dense(1))
##Compile model
## mse= mean squared error
model.compile(optimizer='adam', loss='mse')
# ## Fiting model with given training dataset
history_data = model.fit(Xtrain, Ytrain, batch_size=50, epochs=50, verbose=2, validation_split=0.2)
##Create testing dataset, new array which contains scaled value from 2275 out of 2843
testingData = scaledData[trainingDataLength - 60: , :]
## Create dataset Xtest and Ytest
Xtest = []
Ytest = stock_close_dataset[trainingDataLength:, :]
for i in range(60, len(testingData)):
Xtest.append(testingData[i-60:i, 0])
## Convert data into numpy array
Xtest = np.array(Xtest)
## Reshape data from 2 Dimensional to 3 Dimensional
Xtest = np.reshape(Xtest, (Xtest.shape[0], Xtest.shape[1], 1 ))
## Get predicted stock price value
## Unscaling the predicted value
predictions = model.predict(Xtest)
predictions = scaler.inverse_transform(predictions)
## Ploting data to graph train and validation
training = stock_close_data[:trainingDataLength]
validation = stock_close_data[trainingDataLength:]
validation['Predictions'] = predictions
error = 100*np.mean(Ytest-predictions)/len(Ytest)
return round(error, 3)
results = []
for s in return_list:
results.append({s, predict(s)})
## THIS SECTION HERE ADDS THE DATA TO AN EXCEL SHEET AND CREATES IT IN THE SAME FOLDER
print("completed LSTM modeling")
print("Starting transfer to file named \"Stock_Analysis_Results.xlsx\"")
# TOP 25 STOCKS OF EACH FUND TO A NEW SHEET IN THE EXCEL FILE
df = pd.DataFrame(json_stocks)
writer = pd.ExcelWriter('Stock_Analysis_Results.xlsx', engine='xlsxwriter')
df = df.rename(columns = {0:"current Value (In Dollars)",1:"CIK", 2:"CUSIP", 3:"StockName", 4:"FilingDate", 5:"FundName", 6:"Ranked(By Total Value)",7:"Share Change Since Last Quarter"})
df.to_excel(writer, sheet_name='TOP_25_STOCKS_OF_EACH_FUND')
# TOP 25 STOCKS AVERAGED OVER ALL FUNDS
# PLUS PREDICTION ERROR AND SHARE PRICE DIFFERENCE BETWEEN MOST RECENT QUARTER & ITS PREVIOUS ONE
list_results = []
for stock_pred in results:
temp_list = []
stock_pred = list(stock_pred)
stock_pred.sort(key = lambda item: ([str,np.float64].index(type(item)), item))
temp_list.append(stock_pred[0])
temp_list.append(stock_pred[1])
start = quarter(dt.datetime.now())
end = dt.datetime.now()
company = stock_pred[0]
data_frame = web.DataReader(company, 'yahoo', start,end)
EOQ = data_frame['Close'].values[0]
current = data_frame['Close'].values[len(data_frame)-1]
temp_list.append(EOQ.round(decimals=2))
temp_list.append(current.round(decimals=2))
diff_dol = current - EOQ
diff_per = current/EOQ * 100
temp_list.append(diff_dol.round(decimals=2))
temp_list.append(diff_per.round(decimals=2))
list_results.append(temp_list)
df2 = pd.DataFrame(list_results)
df2 = df2.rename(columns = {0:"Stock Ticker", 1:"Prediction Error", 2:"Closing Share Price on Date of Last Filing", 3: "Latest Share Price for Today", 4:"Change in Share Price($)", 5:"Change in Share Price(%)"})
df2.to_excel(writer, sheet_name = 'Top25AvgOverAllFunds')
writer.save()
c.close()
print("Finished Program. Please check working directory for result file \"Stock_Analysis_Results.xlsx\"")