Skip to content

Commit

Permalink
Merge pull request #25 from mepley1/save-raw-request-body
Browse files Browse the repository at this point in the history
Save raw request body + enable regex search
  • Loading branch information
mepley1 authored Dec 29, 2024
2 parents 5142335 + 0b87cc9 commit 86d90f4
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 12 deletions.
66 changes: 58 additions & 8 deletions project/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ def createDatabase(): # note: change column names to just match http headers, th
"requestmethod" TEXT CHECK(length("requestmethod") <= 8),
"querystring" TEXT,
"time" DATETIME CHECK(length("time") <= 1024),
"postjson" TEXT,
"body_raw" BLOB,
"body_processed" TEXT,
"headers" TEXT,
"headers_json" TEXT,
"url" TEXT,
Expand Down Expand Up @@ -140,7 +141,7 @@ def validate_id_numeric(_id):
return False

def validate_header_key(_hk):
""" Letters + hyphen. """
""" Validate an HTTP header name. Letters + hyphen. """
pattern = r'^[a-zA-Z\-_]+$'
if re.match(pattern, _hk):
return True
Expand Down Expand Up @@ -224,9 +225,11 @@ def index(u_path):
req_version = request.environ.get('SERVER_PROTOCOL') #http version
logging.info(f'{req_ip} {request} {req_version}')

#add to db schema later
req_args_j = json.dumps(request.args) #So I can have a jsonified version as well
#add to db schema later (unused right now)
#req_args_j = json.dumps(request.args) #So I can have a jsonified version as well

# Save body un-processed, so I have a consistent column.
req_body_raw = request.get_data()

# NEW SECTION: Get the POST request body
# Get the request body. Could be any content-type, format, encoding, etc, try to capture
Expand Down Expand Up @@ -303,14 +306,15 @@ def index(u_path):

# Request data to insert into the database
sql_query = """INSERT INTO bots
(id,remoteaddr,hostname,useragent,requestmethod,querystring,time,postjson,headers,headers_json,url,reported,contenttype,country,from_contact,scheme,host,path,referer)
VALUES (NULL, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);"""
(id,remoteaddr,hostname,useragent,requestmethod,querystring,time,body_raw,body_processed,headers,headers_json,url,reported,contenttype,country,from_contact,scheme,host,path,referer)
VALUES (NULL, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);"""
data_tuple = (req_ip,
req_hostname,
req_user_agent,
req_method,
req_query,
req_time,
req_body_raw,
req_body,
str(req_headers),
headers_json,
Expand Down Expand Up @@ -908,15 +912,15 @@ def queriesStats():
@main.route('/stats/body', methods = ['GET'])
@login_required
def bodyStats():
""" Get records matching the POST request body. """
""" Get records matching the request body. (Query body_processed column, stored as decoded text) """
body = unquote(request.args.get('body', ''))

with sqlite3.connect(requests_db) as conn:
conn.row_factory = sqlite3.Row
c = conn.cursor()
# Query for matching request body
sql_query = """
SELECT * FROM bots WHERE (postjson LIKE ?) ORDER BY id DESC;
SELECT * FROM bots WHERE (body_processed LIKE ?) ORDER BY id DESC;
"""
data_tuple = (body,)
c.execute(sql_query, data_tuple)
Expand Down Expand Up @@ -951,6 +955,49 @@ def bodyStats():
subtitle = f'{body}',
)

@main.route('/stats/body_raw', methods = ['GET'])
@login_required
def bodyRawStats():
""" Get records matching the request body. Regex query. (body_raw column, stored as blob) """
body = unquote(request.args.get('body', ''))

with sqlite3.connect(requests_db) as conn:
conn.row_factory = sqlite3.Row
c = conn.cursor()
# Query for matching request body, order by most recent.
conn.create_function("REGEXP", 2, regexp)
sql_query = '''SELECT * FROM bots WHERE body_raw REGEXP (?) ORDER BY id DESC;'''
data_tuple = (body,)
c.execute(sql_query, data_tuple)
bodyStats = c.fetchall()
c.close()
conn.close()

#pagination
page = int(request.args.get('page', 1))
items_per_page = int(request.args.get('per_page', 100))
total_items = len(bodyStats)
total_pages = ceil(total_items / items_per_page)
start_index = (page - 1) * items_per_page
end_index = min(start_index + items_per_page, total_items)

stats_on_page = bodyStats[start_index:end_index]

args_for_pagination = request.args.to_dict()
if 'page' in args_for_pagination:
# Remove the page# so we can add a new one to the pagination links
del args_for_pagination['page']

return render_template('stats.html',
stats = stats_on_page, #pagination
page = page, #pagination
total_pages = total_pages, #pagination
args_for_pagination = args_for_pagination, #pagination
totalHits = len(bodyStats),
statName = f"Request body like:",
subtitle = f'{body}',
)

@main.route('/stats/content-type', methods = ['GET'])
@login_required
def content_type_stats():
Expand Down Expand Up @@ -1616,6 +1663,9 @@ def parse_search_form():
body_string = query_text
body_string = '%' + body_string + '%'
return redirect(url_for('main.bodyStats', body = body_string))
elif chosen_query == 'body_raw':
q = query_text
return redirect(url_for('main.bodyRawStats', body = q))
elif chosen_query == 'hostname_endswith':
hostname_string = query_text.strip()
return redirect(url_for('main.hostname_stats', hostname = hostname_string))
Expand Down
3 changes: 2 additions & 1 deletion project/templates/search.html
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ <h2>Search for HTTP requests where:</h2>
<option value="header_key">Headers: Keys only</option>
<option value="content_type">Content-Type</option>
<option value="ua_string">User-agent</option>
<option value="body_string">Body</option>
<option value="body_string">Body (processed) - like</option>
<option value="body_raw">Body (raw) - regex</option>
<option value="any_field">Any field</option>
</optgroup>
</select>
Expand Down
11 changes: 8 additions & 3 deletions project/templates/stats.html
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,10 @@ <h2>Most recent requests matching query</h2>
<input type="checkbox" name="dataCheckbox" class="sv0" value="dataURL" id="cbURL">URL
</label>
<label>
<input type="checkbox" name="dataCheckbox" class="sv0" value="dataPostData" id="cbBody" checked>Body
<input type="checkbox" name="dataCheckbox" class="sv0" value="dataBodyBytes" id="cbBodyBytes">Body (raw)
</label>
<label>
<input type="checkbox" name="dataCheckbox" class="sv0" value="dataPostData" id="cbBody" checked>Body (processed)
</label>
<label>
<input type="checkbox" name="dataCheckbox" class="sv0" value="dataContentType" id="cbContentType">Content-Type
Expand Down Expand Up @@ -158,7 +161,8 @@ <h2>Most recent requests matching query</h2>
<th class="dataPath sv1">Path</th>
<th class="dataQueryString sv0">Query String</th>
<th class="dataURL hidden sv0">URL</th>
<th class="dataPostData sv0">Body</th>
<th class="dataBodyBytes hidden sv0">Body (raw)</th>
<th class="dataPostData sv0">Body (processed)</th>
<th class="dataContentType hidden sv0">Content-Type</th>
<th class="dataHostname sv0">Hostname</th>
<th class="dataTime sv0">Time</th>
Expand All @@ -181,7 +185,8 @@ <h2>Most recent requests matching query</h2>
<td class="dataPath dataToLink sv1"><a href="{{url_for('main.path_stats', path=row['path'])}}">{{ row['path'] }}</a></td>
<td class="dataQueryString dataToLink mono smaller sv0"><a href="{{url_for('main.queriesStats', query=row['querystring']|urlencode)}}">{{row['querystring']|e}}</a></td>
<td class="dataURL dataToLink hidden sv0"><a href="{{url_for('main.urlStats', url=row['url'])}}">{{row['url']|e}}</a></td>
<td class="dataPostData dataToLink mono smaller sv0"><a href="{{url_for('main.bodyStats', body=row['postjson']|quote_plus)}}">{{row['postjson']|e}}</a></td>
<td class="dataBodyBytes dataToLink mono smaller hidden sv0"><a href="{{url_for('main.bodyRawStats', body=row['body_raw'].decode(errors='replace')|urlencode)}}">{{row['body_raw'].decode(errors='replace')|e}}</a></td>
<td class="dataPostData dataToLink mono smaller sv0"><a href="{{url_for('main.bodyStats', body=row['body_processed']|quote_plus)}}">{{row['body_processed']|e}}</a></td>
<td class="dataContentType dataToLink hidden sv0"><a href="{{url_for('main.content_type_stats', ct=row['contenttype']) }}">{{row['contenttype']|e}}</a></td>
<td class="dataHostname dataToLink sv0"><a href="{{url_for('main.hostname_stats', hostname=row['hostname'])}}">{{row['hostname']}}</a></td>
<td class="dataTime dataToLink sv0"><a href="{{url_for('main.date_stats', date=row['time'], accuracy=16)}}">{{row['time']}}</a></td>
Expand Down

0 comments on commit 86d90f4

Please sign in to comment.