-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
004a6da
commit 3097410
Showing
2 changed files
with
81 additions
and
54 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,16 @@ | ||
#!/usr/bin/python3 | ||
'''A script that generates random HTTP request logs. | ||
''' | ||
"""Log Parsing""" | ||
import random | ||
import sys | ||
import datetime | ||
from time import sleep | ||
|
||
import datetime | ||
|
||
for i in range(10000): | ||
sleep(random.random()) | ||
sys.stdout.write("{:d}.{:d}.{:d}.{:d} - [{}] \"GET {} {}\" {} {}\n".format( | ||
random.randint(1, 255), | ||
random.randint(1, 255), | ||
random.randint(1, 255), | ||
random.randint(1, 255), | ||
sys.stdout.write("{:d}.{:d}.{:d}.{:d} - [{}] \"GET /projects/260 HTTP/1.1\" {} {}\n".format( | ||
random.randint(1, 255), random.randint(1, 255), random.randint(1, 255), random.randint(1, 255), | ||
datetime.datetime.now(), | ||
'/projects/1216', | ||
'HTTP/1.1', | ||
random.choice([200, 301, 400, 401, 403, 404, 405, 500]), | ||
random.randint(1, 1024) | ||
)) | ||
sys.stdout.flush() | ||
sys.stdout.flush() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,53 +1,87 @@ | ||
#!/usr/bin/env python3 | ||
"""Log Parsing""" | ||
#!/usr/bin/python3 | ||
"""A script for parsing HTTP request logs.""" | ||
import re | ||
|
||
import sys | ||
|
||
def print_stats(total_file_size, status_codes): | ||
"""Prints the stats""" | ||
print("File size:", total_file_size) | ||
for code in sorted(status_codes.keys()): | ||
print(f"{code}: {status_codes[code]}") | ||
|
||
def main(): | ||
"""Main function""" | ||
total_file_size = 0 | ||
status_codes = { | ||
200: 0, | ||
301: 0, | ||
400: 0, | ||
401: 0, | ||
403: 0, | ||
404: 0, | ||
405: 0, | ||
500: 0 | ||
def extract_input(input_line): | ||
'''Extracts sections of a line of an HTTP request log. | ||
''' | ||
fp = ( | ||
r'\s*(?P<ip>\S+)\s*', | ||
r'\s*\[(?P<date>\d+\-\d+\-\d+ \d+:\d+:\d+\.\d+)\]', | ||
r'\s*"(?P<request>[^"]*)"\s*', | ||
r'\s*(?P<status_code>\S+)', | ||
r'\s*(?P<file_size>\d+)' | ||
) | ||
info = { | ||
'status_code': 0, | ||
'file_size': 0, | ||
} | ||
log_fmt = '{}\\-{}{}{}{}\\s*'.format(fp[0], fp[1], fp[2], fp[3], fp[4]) | ||
resp_match = re.fullmatch(log_fmt, input_line) | ||
if resp_match is not None: | ||
status_code = resp_match.group('status_code') | ||
file_size = int(resp_match.group('file_size')) | ||
info['status_code'] = status_code | ||
info['file_size'] = file_size | ||
return info | ||
|
||
try: | ||
for idx, line in enumerate(sys.stdin, start=1): | ||
line = line.strip() | ||
parts = line.split() | ||
|
||
if len(parts) != 7: | ||
continue | ||
def print_statistics(total_file_size, status_codes_stats): | ||
'''Prints the accumulated statistics of the HTTP request log. | ||
''' | ||
print('File size: {:d}'.format(total_file_size), flush=True) | ||
for status_code in sorted(status_codes_stats.keys()): | ||
num = status_codes_stats.get(status_code, 0) | ||
if num > 0: | ||
print('{:s}: {:d}'.format(status_code, num), flush=True) | ||
|
||
|
||
_, _, _, status_code_str, file_size_str = parts | ||
def update_metrics(line, total_file_size, status_codes_stats): | ||
'''Updates the metrics from a given HTTP request log. | ||
try: | ||
status_code = int(status_code_str) | ||
file_size = int(file_size_str) | ||
except ValueError: | ||
continue | ||
Args: | ||
line (str): The line of input from which to retrieve the metrics. | ||
total_file_size += file_size | ||
if status_code in status_codes: | ||
status_codes[status_code] += 1 | ||
Returns: | ||
int: The new total file size. | ||
''' | ||
line_info = extract_input(line) | ||
status_code = line_info.get('status_code', '0') | ||
if status_code in status_codes_stats.keys(): | ||
status_codes_stats[status_code] += 1 | ||
return total_file_size + line_info['file_size'] | ||
|
||
if idx % 10 == 0: | ||
print_stats(total_file_size, status_codes) | ||
|
||
except KeyboardInterrupt: | ||
print_stats(total_file_size, status_codes) | ||
def run(): | ||
'''Starts the log parser. | ||
''' | ||
line_num = 0 | ||
total_file_size = 0 | ||
status_codes_stats = { | ||
'200': 0, | ||
'301': 0, | ||
'400': 0, | ||
'401': 0, | ||
'403': 0, | ||
'404': 0, | ||
'405': 0, | ||
'500': 0, | ||
} | ||
try: | ||
while True: | ||
line = input() | ||
total_file_size = update_metrics( | ||
line, | ||
total_file_size, | ||
status_codes_stats, | ||
) | ||
line_num += 1 | ||
if line_num % 10 == 0: | ||
print_statistics(total_file_size, status_codes_stats) | ||
except (KeyboardInterrupt, EOFError): | ||
print_statistics(total_file_size, status_codes_stats) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() | ||
if __name__ == '__main__': | ||
run() |