-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathImport_Data_To_DB.py
105 lines (96 loc) · 3.4 KB
/
Import_Data_To_DB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#This function queries the hedge fund data from a list of CIKs
#Downloads the data as .json files (one per fund), reads the data from the files and puts it into the database.
#@param number_of_quarters is the number of most recent quarters for which data should be pulled.
def run_import(number_of_quarters):
# IMPORTS FOR CHECKING IF DB IS UP TO DATE
import datetime as dt
#
# IMPORTS FOR EDGAR
from sec_api import QueryApi
# IMPORTS FOR SQL DB CONNECTION
import pymysql
CHANGE FIELDS TO YOUR OWN PERSONAL FIELDS
db = pymysql.connect(
host = 'your database hosting site',
user = 'your username',
password = 'your password',
db = 'Name of the database you are connecting to'
)
c = db.cursor()
#ENTER YOUR OWN QUERY API
queryApi = QueryApi(api_key="Copy your query API here")
cik = [
"1401388",
"1029160",
"1423053",
"1736225",
"1294571",
"1603466",
"1569049",
"1439289",
"1410830",
"1389507",
"1343781",
"1350694",
"1317679",
"1318757",
"1061165",
"1092838",
"1280493",
"1273087",
"1218710",
"1166309",
"1167483",
"1136704",
"1135730",
"1106500",
"1040198",
"1040273",
"1103804",
"934639",
"1009207"
]
all_filings = []
for a in range(len(cik)):
#Download the data for the funds identified by each CIK
fund_cik = cik[a]
print("Starting with fund: " + str(fund_cik))
query = {
"query": {
"query_string": {
"query": "cik: "+ str(fund_cik) + " AND formType:13F-HR"
}
},
"from": "0",
"size": str(number_of_quarters),
"sort": [{"filedAt": {"order": "desc"}}]
} #% str(number_of_quarters)
print(query.get('query').get('query_string').get('query') + "fund_cik is: " + str(fund_cik))
filings = queryApi.get_filings(query)
#These next few lines are to prevent double filings with a different fund which was occuring amongst a few of the funds
set_size = 0
for i in filings.get('filings'):
if(set_size > 4):
break
set_size += 1
if(i.get('cik') != fund_cik):
continue
if(i.get('holdings') == None):
continue
if(i.get('periodOfReport') == None):
continue
#Input the downloaded data into the database.
filingDate = i.get('periodOfReport')
cik_of_fund = i.get('cik')
fund_name = i.get('companyNameLong')
for j in i.get('holdings'):
all_filings.append(j)
s = j.get('shrsOrPrnAmt')
shares = str(s.get('sshPrnamt'))
value = str(j.get('value'))
cusip = str(j.get('cusip'))
nameOfIssuer = str(j.get('nameOfIssuer'))
sql = "INSERT INTO All_Holdings_Raw_Data (cusip, nameOfIssuer, shares, value, filingDate, CIK) VALUES (%s, %s, %s, %s, %s, %s)"
c.execute(sql,(cusip, nameOfIssuer, shares, value, filingDate,cik[a]))
db.commit()
print("finished with fund: " + fund_name)