-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdbconnect.py
204 lines (174 loc) · 6.89 KB
/
dbconnect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# dbconnect.py
import urllib.parse # urljoin
import sqlalchemy
# requires pymysql so keep that installed
import pandas as pd
import os, json, time
import commonfuncs as cf
root = os.path.dirname(__file__) # needed for tornado
absPath = os.path.realpath(__file__)
#############################
# initiate DB connection
creds = {"host": os.environ.get('MYSQL_SERVER'),
"db": os.environ.get('MYSQL_DB'),
"uid": os.environ.get('MYSQL_UID'),
"pw": os.environ.get('MYSQL_PW')
}
tsql1 = time.time()
conn_str = f"mysql+pymysql://{creds['uid']}:{creds['pw']}@{creds['host']}/{creds['db']}"
sqlEngine = sqlalchemy.create_engine(conn_str,echo=False, pool_recycle=1)
tsql2 = time.time()
cf.logmessage(f"Connected to SQL in {round(tsql2-tsql1,3)} secs")
#############################
# SQL security
def sqlSecurty(x, inputTerm=False):
sqlBlacklist = [';', '(select', '-->', '<--', '<!--', 'sleep(', '#exec']
sql_inputs_Blacklist = sqlBlacklist + list(""""'`\/^*()!#""") # 2021-06-17 allowing space
if isinstance(x, (int, float)):
# its a number. let em pass
return True
if not isinstance(x, str):
# not a number, not a string also? Suspicious!
cf.logmessage("not a number, not a string also? Suspicious!")
return False
if inputTerm:
# stricter check for input values ids tablenames etc
if any([f in x for f in sql_inputs_Blacklist]):
return False
else:
if any([f in x for f in sqlBlacklist]):
return False
return True
def sqlSecurity_payload(payload):
for key in payload.keys():
if not sqlSecurty(key, inputTerm=True):
cf.logmessage("Suspicious key found in payload json")
return False
if not sqlSecurty(payload[key], inputTerm=True):
cf.logmessage("Suspicious value found in payload json")
return False
return True
#############################
# Generic SQL query handling functions
def makeQuery(s1, output='oneValue', lowerCaseColumns=False, keepCols=True, fillna=True):
'''
output choices:
oneValue : ex: select count(*) from table1 (output will be only one value)
oneRow : ex: select * from table1 where id='A' (output will be onle one row)
df: ex: select * from users (output will be a table)
list: json array, like df.to_dict(orient='records')
column: first column in output as a list. ex: select username from users
oneJson: First row, as dict
'''
if not isinstance(s1,str):
cf.logmessage("query needs to be a string")
return False
if ';' in s1:
cf.logmessage("; not allowed")
return False
# if len(s1) > 100:
# cf.logmessage("Querying SQL: {}...".format(s1.strip().replace('\n',' ')[:100]))
# else:
# cf.logmessage("Querying SQL:",s1.strip().replace('\n',' '))
# # keeping auth check and some other queries out
# skipPrint = ['where token=', '.STArea()', 'STGeomFromText']
# if not any([(x in s1) for x in skipPrint]) :
# cf.logmessage("Querying SQL:",s1.strip().replace('\n',' '))
# else:
# cf.logmessage("Querying SQL: {}...".format(s1.strip().replace('\n',' ')[:20]))
global sqlEngine
c = sqlEngine.connect()
if output == 'oneValue':
result = c.execute(s1).fetchone()
c.close()
if not result: return None
return result[0]
elif output == 'oneRow':
result = c.execute(s1).fetchone()
c.close()
return result
elif output in ['df','list','oneJson','column']:
# df
if fillna:
df = pd.read_sql_query(s1, con=c, coerce_float=False).fillna('')
else:
df = pd.read_sql_query(s1, con=c, coerce_float=False)
# coerce_float : need to ensure mobiles aren't screwed
c.close()
# make all colunm headers lowercase
# if lowerCaseColumns: df.columns = [x.lower() for x in df.columns] # from https://stackoverflow.com/questions/19726029/how-can-i-make-pandas-dataframe-column-headers-all-lowercase
if output=='df':
if (not len(df)) and (not keepCols): return []
else: return df
if not len(df): return []
if output == 'column':
return df.iloc[:,0].tolist() # .iloc[:,0] -> first column
elif output == 'list':
return df.to_dict(orient='records')
elif output == 'oneJson':
return df.to_dict(orient='records')[0]
else:
# default - df
return df
def execSQL(s1):
if not isinstance(s1,str):
cf.logmessage("query needs to be a string")
return False
if ';' in s1:
cf.logmessage("; not allowed")
return False
if len(s1) > 100:
cf.logmessage("Executing SQL: {}...".format(s1.strip().replace('\n',' ')[:100]))
else:
cf.logmessage("Executing SQL:",s1.strip().replace('\n',' '))
# # keeping auth check and some other queries out
# skipPrint = ['set token=', 'polyline6', 'STGeomFromText']
# if not any([(x in s1) for x in skipPrint]) :
# cf.logmessage("Executing SQL:",s1.strip().replace('\n',' '))
# else:
# cf.logmessage("Executing SQL: {}...".format(s1.strip().replace('\n',' ')[:20]))
global sqlEngine
try:
c = sqlEngine.connect()
c.execute(s1)
c.close()
return True
except sqlalchemy.exc.IntegrityError as e:
cf.logmessage("This entry already exists, skipping.")
cf.logmessage(e)
return False
except:
cf.logmessage('WARNING: could not execute query.')
raise
return False
##### brought in from Sourcing Workflow application
def getColumnsList(tablename, engine):
statement1 = f"""
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = N'{tablename}'
"""
# sql = db.text(statement1)
df = pd.read_sql_query(statement1,con=engine)
# return df['COLUMN_NAME'].str.lower().to_list()
return df['COLUMN_NAME'].to_list()
def addRow(params,tablename):
df = pd.DataFrame([params])
return addTable(df, tablename) # heck
def addTable(df, tablename):
global sqlEngine
c = sqlEngine.connect()
table_cols = getColumnsList(tablename,c)
# df.columns = [x.lower() for x in df.columns] # make lowercase
sending_cols = [x for x in table_cols if x in df.columns]
discarded_cols = set(df.columns.to_list()) - set(table_cols)
if len(discarded_cols): cf.logmessage("Dropping {} cols from uploaded data as they're not in the DB: {}".format(len(discarded_cols),', '.join(discarded_cols)))
# ensure only those values go to DB table that have columns there
# cf.logmessage("Adding {} rows into {} with these columns: {}".format(len(df), tablename, sending_cols))
try:
df[sending_cols].to_sql(name=tablename, con=c, if_exists='append', index=False )
c.close()
except:
c.close()
return False
return True