-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbreathmint.py
579 lines (561 loc) · 23.2 KB
/
breathmint.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
#!/usr/bin/python3
'''
breathmint
Burp Issues XML Parser
typical workflow:
0. find_burp_output(directory) -> returns list of files ("burp_file_list")
1. parse_files(file_list=burp_file_list) -> returns list of all issues ("all_issues")
uses make_me_pretty to clean up output content
2. create_output - using excelsify
a. create_worksheet_data() for each worksheet you wish to include in the workbook
i. make a list of column names to be included in the worksheet (output_column_names)
ii. create_worksheet_data(output_column_names=output_column_names, issue_data_list=all_issues) -> returns dictionary; use as input to "excelsify_worksheet_data"
b. excelsify.create_workbook(worksheet_data=excelsify_worksheet_data, out_filename=excelsify_workbook_name)
'''
#
#
# -- import public modules --
#
#
import xml.etree.ElementTree as ET
import argparse
import os
import datetime
import traceback
import sys
import re
import html
import unicodedata
#
#
# -- import private modules --
#
#
import excelsify
import make_me_pretty
#
#
# -- Global variables --
#
#
RISK_VALUES = ["Critical", "High", "Medium", "Low", "Informational"]
RISK_SYNONYM_MAPPING = {'None':"Informational", 'Info':"Informational", 'Information':"Informational", 'Moderate':"Medium"}
SORT_ORDER_RISK = {"Critical":0, "High":1, "Medium":2, "Low":3, "Informational":4}
#
#
# -- Function declarations --
#
#
#
#
# breathmint_logo
#
# tis a silly little function to return a logo as a string
#
#
def breathmint_logo():
try:
retval = ""
retval = retval + " /||-----||/\n"
retval = retval + " |-------------|/\n"
retval = retval + " -----------------|\n"
retval = retval + " |------------------/\n"
retval = retval + " //////////|--------|\n"
retval = retval + ".......--------------------------------------| ////\n"
retval = retval + "_......-------------------------------------/ /||--------||\n"
retval = retval + "-......------------------------------------/ |--------------|\n"
retval = retval + "/......----------------------------------/ ------------------\n"
retval = retval + " --------------------------||||||||||||/ |------------------|\n"
retval = retval + " ___________________________________________//////////|-------|\n"
retval = retval + " /........----------------------------------------------------|\n"
retval = retval + " /.......----------------------------------------------------/\n"
retval = retval + " /_.....--------------------------------------------------/\n"
retval = retval + " -....------------------------------------------------|/\n"
retval = retval + " --------------------------------------------------//\n"
retval = retval + " ______________________________________\n"
retval = retval + " /-..._________------------------------|\n"
retval = retval + " |-_________--------------------------/\n"
retval = retval + " /|-_____---------------------------/\n"
retval = retval + " /||---------------------------|\n"
retval = retval + " //////////|--------|\n"
retval = retval + " |------------------/\n"
retval = retval + " -----------------|\n"
retval = retval + " |-------------|/\n"
retval = retval + " /||-----||/\n"
retval = retval + "\n\nby FYRM Associates\n"
return retval
except Exception as e:
print('\n==== Exception ====\n breathmint.breathmintLogo()\n----')
print(e)
traceback.print_exc()
print('\n===================')
print('(this is really embarrassing)')
return 'breathmint'
#
#
# find_burp_output
#
# find all Burp output files in the given directory
#
#
def find_burp_output(directory):
try:
files = []
for file in os.listdir(directory):
if (file.endswith(".xml") and not file.startswith("~$")):
print('Found ' + file)
files.append(file)
else:
continue
return files
except Exception as e:
print('\n==== Exception ====\n breathmint.find_burp_output()\n----')
print(e)
traceback.print_exc()
print("===================")
return []
#
#
# parse_atags_in_html_string
#
# get the url and displaytext from the html string
# useful for the references and classification content in Burp XML output
#
# the references item is a single string containing HTML tags: <ul>, <li>, <a>
# e.g.
# <ul>
# <li><a href="https://developer.mozilla.org/en-US/docs/Web/Security/HTTP_strict_transport_security">HTTP Strict Transport Security</a></li>
# <li><a href="http://www.thoughtcrime.org/software/sslstrip/">sslstrip</a></li>
# <li><a href="https://hstspreload.appspot.com/">HSTS Preload Form</a></li>
# </ul>
# e.g.
# <ul><li><a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/X-Frame-Options">X-Frame-Options</a></li></ul>
#
# parameters:
# html_string - something that looks like the examples above
#
# returns:
# [{'url':"<<url_0>>", 'displaytext':"<<displaytext_0>>"}, ..., {'url':"<<url_n>>", 'displaytext':"<<displaytext_n>>"}]
#
#
def parse_atags_in_html_string(html_string):
retval = []
try:
html_string = html_string.strip()
# remove both opening and closing HTML list tags (<ul>, </ul>, <li>, </li>)
html_string = re.sub(r'</*ul>', '', html_string)
html_string = re.sub(r'</*li>', '', html_string)
# split at the </a> closing tags
html_string_list = html_string.split('</a>')
for each_reference in html_string_list:
each_reference = each_reference.strip()
if len(each_reference) > 0:
# remove the <a part of opening tag including the href syntax
# either single quote or double quote might be used so just remove both
# there's a single regex that could do this, but i don't feel like searching stackoverflow for it
each_reference = re.sub(r'<a href="', '', each_reference)
each_reference = re.sub(r"<a href='", '', each_reference)
# then split at the "> characters to separate the URL from the user-friendly link display text
url_displaytext_split = each_reference.split('">')
# and in case '> was used instead of ">, do this:
if len(url_displaytext_split) == 1:
url_displaytext_split = each_reference.split("'>")
url = url_displaytext_split[0].strip()
displaytext = url_displaytext_split[1].strip()
retval.append({'url':url, 'displaytext':displaytext})
except Exception as e:
print('\n==== Exception ====\n breathmint.parse_atags_in_html_string()\n----')
print(e)
traceback.print_exc()
print('\n===================')
return retval
#
#
# parse_files
#
# parse the given file list
#
# parameters:
# file_list - list of files; output from find_burp_output function
# risk_excluded - specifically excluded risk ratings
# will check for both the Burp "severity" value and the mapped values in RISK_SYNONYM_MAPPING[severity]
# will continue to next issue if the current issue has a matching risk value
# risk_included - specifically included risk ratings
# if empty list then all will be included
#
# returns:
# all_issues - see the comment in the __main__ function for details
#
#
def parse_files(file_list, risk_excluded=[], risk_included=[]):
all_issues = []
try:
print("<< Parsing Burp files >>")
for file in file_list:
print("Parsing: " + str(file) + "\n...")
try:
#
# Get XML tree/root
tree = ET.parse(file)
root = tree.getroot()
#
# "maximum effort" to verify this is actually a Burp xml file
# - Deadpool
try:
burp_version = root.get('burpVersion')
if (burp_version == None or burp_version == ""):
print("Warning: parse_files: the file \"" + str(file) + "\" does not appear to be a Burp xml issue export file")
continue
except:
print("Warning: parse_files: the file \"" + str(file) + "\" does not appear to be a Burp xml issue export file")
continue
issue_count = 0
for issue in root.findall('issue'):
#
# Generic issue data mapping (breathmint <-> burp.xml):
# serial_number <-> serialNumber
# background <-> issueBackground
# remediation <-> remediationBackground
# references <-> references
# classification <-> vulnerabilityClassifications
#
serial_number = str(issue_count)
if not issue.find('serialNumber') == None:
serial_number = issue.find('serialNumber').text
name = issue.find('name').text
background = ""
if not issue.find('issueBackground') == None:
background = issue.find('issueBackground').text
background = unicodedata.normalize("NFKD", background)
background = make_me_pretty.fix_spacing_issues(contents=background)
background = make_me_pretty.remove_lxml_markup(contents=background)
remediation = ""
if not issue.find('remediationBackground') == None:
remediation = issue.find('remediationBackground').text
remediation = unicodedata.normalize("NFKD", remediation)
remediation = make_me_pretty.fix_spacing_issues(contents=remediation)
remediation = make_me_pretty.remove_lxml_markup(contents=remediation)
if not issue.find('remediationDetail') == None:
remediation_detail = issue.find('remediationDetail').text
if (not remediation_detail == None and not remediation_detail == ""):
remediation_detail = unicodedata.normalize("NFKD", remediation_detail)
remediation_detail = make_me_pretty.fix_spacing_issues(contents=remediation_detail)
remediation_detail = make_me_pretty.remove_lxml_markup(contents=remediation_detail)
if (not remediation_detail == "" and not remediation_detail == "Enter Remediation Detail..."):
remediation += "\n" + remediation_detail
references = []
if not issue.find('references') == None:
parsed_atags = parse_atags_in_html_string(html_string=issue.find('references').text)
# let's just keep the actual URLs, not the display text
for atag_dict in parsed_atags:
references.append(atag_dict['url'])
classification = []
if not issue.find('vulnerabilityClassifications') == None:
parsed_atags = parse_atags_in_html_string(html_string=issue.find('vulnerabilityClassifications').text)
# let's just keep the actual URLs, not the display text
for atag_dict in parsed_atags:
classification.append(atag_dict['url'])
#
# Modifiable issue data mapping (breathmint <-> burp.xml):
# severity <-> severity
# confidence <-> confidence
#
severity = ""
risk = ""
if not issue.find('severity') == None:
severity = issue.find('severity').text
risk = severity
if risk in RISK_SYNONYM_MAPPING.keys():
risk = RISK_SYNONYM_MAPPING[risk]
if not risk in SORT_ORDER_RISK.keys():
print("ERROR: unexpected risk (" + risk + ")")
if risk in risk_excluded:
continue
elif (not risk_included == [] and not risk in risk_included):
continue
else:
confidence = ""
if not issue.find('confidence') == None:
confidence = issue.find('confidence').text
#
# Target data mapping (breathmint <-> burp.xml):
# ip <-> host ip
# uri <-> host
# port <-> None (port is determined using uri value)
# path <-> path
# location <-> location
#
# note: ip and uri is in the <host> tag with the following format:
# <host ip="10.1.2.3">https://www.example.org</host>
#
ip = issue.find('host').get('ip')
uri = issue.find('host').text
fqdn = ""
port = "443"
protocol = "https"
uri_split = uri.split(':')
if len(uri_split) == 3:
fqdn = re.sub(r'//', '', uri_split[1])
port = uri_split[2]
elif len(uri_split) == 2:
fqdn = re.sub(r'//', '', uri_split[1])
if uri_split[0] == "http":
port = "80"
protocol = "http"
elif uri_split[0] == "https":
port = "443"
else:
print("TODO: add default port number assignment to the code; protocol observed:", uri_split[0])
path = ""
if not issue.find('path') == None:
path = issue.find('path').text
location = ""
if not issue.find('location') == None:
location = issue.find('location').text
#
# sometimes burp results put the same value in path and location, in which case it seems like location is really just the path
if location == path:
location = ""
#
# Additional details data mapping (breathmint <-> burp.xml):
# target_details <-> issueDetailItems
# issue_details <-> issueDetail
# issue_details <-> issueDetailItems (list with all issueDetail text)
# requestresponse <-> requestresponse
#
target_details = []
if not issue.find('issueDetailItems') == None:
for item_detail in issue.find('issueDetailItems').iter('issueDetailItem'):
target_details.append(item_detail.text)
issue_details = ""
if not issue.find('issueDetail') == None:
issue_details = issue.find('issueDetail').text
issue_details = unicodedata.normalize("NFKD", issue_details)
issue_details = re.sub(' ', '', issue_details)
issue_details = make_me_pretty.fix_spacing_issues(contents=issue_details)
issue_details = make_me_pretty.remove_lxml_markup(contents=issue_details)
if not issue.find('issueDetailItems') == None:
for item_detail in issue.find('issueDetailItems').iter('issueDetailItem'):
new_detail = unicodedata.normalize("NFKD", item_detail.text)
new_detail = make_me_pretty.fix_spacing_issues(contents=new_detail)
new_detail = make_me_pretty.remove_lxml_markup(contents=new_detail)
issue_details += "\n" + new_detail
#
# some burp extensions do not populate the background, remediation, and other fields properly
# and instead throw everything into 'issueDetail'
#
if background == "":
background = issue_details
requestresponse = {}
request_count = 0
response_count = 0
if not issue.find('requestresponse') == None:
for request in issue.find('requestresponse').iter('request'):
requestresponse[str(request_count)] = {}
if request.get('base64') == "true":
requestresponse[str(request_count)]['request'] = request.text
else:
requestresponse[str(request_count)]['request'] = base64.b64encode(request.text.encode('utf-8', 'ignore'))
request_count += 1
for response in issue.find('requestresponse').iter('response'):
if response.get('base64') == "true":
requestresponse[str(response_count)]['response'] = response.text
else:
requestresponse[str(response_count)]['response'] = base64.b64encode(response.text.encode('utf-8', 'ignore'))
response_count += 1
#
# now that we have all the data, add it to all_issues with user-friendly field names as keys
new_issue = {}
new_issue['Serial Number'] = serial_number
new_issue['Vulnerability Name'] = name
new_issue['Background'] = background
#
# might be fun to determine a product name for common apps, but that is for another day
# just a placeholder for now
new_issue['Product Name'] = ""
new_issue['Remediation'] = remediation
new_issue['References'] = references
new_issue['Classification'] = classification
new_issue['Risk'] = risk
new_issue['Severity'] = severity
new_issue['Confidence'] = confidence
new_issue['IP'] = ip
new_issue['URI'] = uri
new_issue['FQDN'] = fqdn
new_issue['Port'] = port
new_issue['Protocol'] = protocol
new_issue['Path'] = path
new_issue['Location'] = location
new_issue['Target Details'] = target_details
new_issue['Issue Details'] = issue_details
new_issue['Request Response'] = requestresponse
all_issues.append(new_issue)
issue_count += 1
except Exception as e:
print("===================")
print("\nERROR: breathmint.parse_files: Exception thrown when parsing file: ", str(file))
print(e)
traceback.print_exc()
print("\n\t moving on to next file")
print("===================")
print("Finished: " + str(file))
print("<< Finished parsing Burp files >>")
all_issues.sort(key=lambda k: SORT_ORDER_RISK[k['Risk']])
except Exception as e:
print('\n==== Exception ====\n breathmint.parse_files()\n----')
print(e)
traceback.print_exc()
print('\n===================')
return all_issues
#
#
# -- Main program execution --
#
#
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-d", help="Location of the directory in which the Burp issues XML files are stored (all .xml files will be opened).")
parser.add_argument("-f", help="Name of the single Burp file you want to parse. Ignored if '-d' option is used.")
parser.add_argument("-e", help="('-e <comma,separated,list>') List of risk ratings to exclude from output; partial starting characters accepted; no spaces (default == none excluded).")
parser.add_argument("-i", help="('-i <comma,separated,list>') List of severity ratings to include in output; partial starting characters accepted; no spaces (default == include all).")
parser.add_argument("-o", help="Base name of output file(s) to which you want the parsed results to be written.")
args = parser.parse_args()
print("\n\n" + breathmint_logo() + "\n\nRunning breathmint\n...\n")
print("<< Finding Burp output files >>")
file_list = []
try:
if args.d:
file_list = find_burp_output(args.d)
elif args.f:
file_list.append(args.f)
else:
print("No directory or file argument provided. Trying current directory.")
file_list = find_burp_output('.')
except Exception as e:
print("===================")
print("\nERROR: breathmint.__main__: Something went wrong when trying to get .xml file(s). This is not my fault. You failed miserably and should feel bad.")
print(e)
traceback.print_exc()
print("===================")
sys.exit()
print("<< Finished finding Burp output files >>")
if file_list == []:
print("\nNote: input file list is empty; that's bad. But also we're done. That's good. Better luck next time. The sprinkles are also cursed.")
sys.exit()
output_filename_base = "burp-output"
try:
if args.o:
output_filename_base = str(args.o)
if output_filename_base.endswith(".xml"):
output_filename_base = output_filename_base.replace(".xml", "")
elif output_filename_base.endswith(".xlsx"):
output_filename_base = output_filename_base.replace(".xlsx", "")
elif output_filename_base.endswith(".docx"):
output_filename_base = output_filename_base.replace(".docx", "")
elif len(file_list) == 1:
output_filename_base = file_list[0].replace(".xml", "")
except Exception as e:
print("===================")
print("\nERROR: breathmint.__main__: Something went wrong when trying to get output filename base:")
print(e)
traceback.print_exc()
print("===================")
sys.exit()
risk_excluded = []
risk_included = []
try:
if args.e:
risks = args.e.split(',')
for risk in risks:
if risk in RISK_VALUES:
risk_excluded.append(risk)
print("new excluded risk added:", risk)
else:
for approved_risk_value in RISK_VALUES:
if (risk.casefold() == approved_risk_value.casefold() or risk.casefold() == approved_risk_value[0].casefold() or approved_risk_value.casefold().startswith(risk.casefold())):
risk_excluded.append(approved_risk_value)
print("new excluded risk added:", approved_risk_value)
if args.i:
risks = args.i.split(',')
for risk in risks:
if risk in RISK_VALUES:
risk_included.append(risk)
print("new included risk added:", risk)
else:
for approved_risk_value in RISK_VALUES:
if (risk.casefold() == approved_risk_value.casefold() or risk.casefold() == approved_risk_value[0].casefold() or approved_risk_value.casefold().startswith(risk.casefold())):
risk_included.append(approved_risk_value)
print("new included risk added:", approved_risk_value)
except Exception as e:
print("===================")
print("\nERROR: breathmint.__main__: Something went wrong when trying to get excluded/included risks:")
print(e)
traceback.print_exc()
print("===================")
sys.exit()
#
# all_issues format:
# [
# {
# 'Serial Number':"<<serialNumber>>",
# 'Vulnerability Name':"<<name>>",
# 'Background':"<<issueBackground>>",
# 'Product Name':"",
# 'Remediation':"<<remediationBackground>>",
# 'References':[{'url':"<<reference_0_url>>", 'displaytext':"<<reference_0_displaytext>>"}, ..., {'url':"<<reference_n_url>>", 'displaytext':"<<reference_n_displaytext>>"}],
# 'Classification':"<<vulnerabilityClassifications>>",
# 'Risk':"<<risk>> == severity | RISK_SYNONYM_MAPPING[severity]", (trying to enforce a common set of risk ratings)
# 'Severity':"<<severity>>",
# 'Confidence':"<<confidence>>",
# 'IP':"<<host ip>>",
# 'URI':"<<host>>",
# 'FQDN':"<<extracted_from_URI>>",
# 'Port':"<<port>>",
# 'Protocol':"<<http|https>>", (probably http or https)
# 'Path':"<<path>>",
# 'Location':"<<location>>",
# 'Target Details':["<<issueDetailItem_0>>", ..., "<<issueDetailItem_n>>"],
# 'Issue Details':"<<issueDetail>>",
# 'Request Response':{
# '0':{ 'request':"<<base64(request)>>", 'response':"<<base64(response)>>", 'redirected':True|False(<<responseRedirected>>) }, ...,
# 'n':{ 'request':"<<base64(request)>>", 'response':"<<base64(response)>>", 'redirected':True|False(<<responseRedirected>>) }
# }
# },
# ...,
# {'Serial Number':"<<serialNumber>>", ..., 'Request Response':{}}
# ]
#
all_issues = []
try:
all_issues = parse_files(file_list=file_list, risk_excluded=risk_excluded, risk_included=risk_included)
if all_issues == []:
print("ERROR: breathmint.__main__: parse_files returned a blank result")
else:
print("<< Generating output files >>")
#
# assuming files were parsed correctly, we can make the output
excelsify_workbook_name = output_filename_base + "--parsed--" + datetime.datetime.now().strftime('%Y%m%d_%H%M') + ".xlsx"
print("Generating:", excelsify_workbook_name)
#
# pick your preferred columns in the order you want them to be placed in the output
# column names must match the keys in each issue dictionary in the all_issues list
output_column_names = ["Vulnerability Name", "Background", "Remediation", "References", "Classification", "Risk", "Confidence", "URI", "Path", "Location", "Target Details", "Issue Details"]
excelsify_worksheet_data = {}
ws = excelsify.create_worksheet_data(output_column_names=output_column_names, issue_data_list=all_issues)
if ws == {}:
print("ERROR: create_worksheet_data returned a blank dictionary")
else:
excelsify_worksheet_data['Burp Issues'] = ws
success = excelsify.create_workbook(worksheet_data=excelsify_worksheet_data, out_filename=excelsify_workbook_name)
if success == True:
print("...\nFinished:", excelsify_workbook_name)
else:
print("ERROR: Failed to generate excel output file:", excelsify_workbook_name)
print("<< Finished generating output files >>")
except Exception as e:
print("===================")
print("\nERROR: breathmint.__main__: Exception thrown in main execution:")
print(e)
traceback.print_exc()
print("===================")
sys.exit()