From 8d90614904b2720fa079e1e39cffd8e1b11d5776 Mon Sep 17 00:00:00 2001 From: Finnegan's Owner <44065187+pacmano1@users.noreply.github.com> Date: Mon, 10 Feb 2025 12:02:01 -0600 Subject: [PATCH] Create messages_per_second.py --- messages_per_second.py | 84 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 messages_per_second.py diff --git a/messages_per_second.py b/messages_per_second.py new file mode 100644 index 0000000..4385582 --- /dev/null +++ b/messages_per_second.py @@ -0,0 +1,84 @@ +import os +import glob +from collections import Counter +from datetime import datetime +import sys + + +def extract_msh_7(hl7_message): + """Extracts MSH-7 (timestamp) from an HL7 message, handling blank lines and CR/LF issues.""" + try: + # Normalize line endings (handle \r, \n, and \r\n inconsistencies) + hl7_message = hl7_message.replace("\r", "\n") + + # Split and remove empty lines + lines = [line.strip() for line in hl7_message.splitlines() if line.strip()] + + # Find the MSH segment + msh_line = next((line for line in lines if line.startswith("MSH|")), None) + + if not msh_line: + print("Warning: No MSH segment found.") + return None + + # Extract field separator from MSH-1 (should always be '|') + separator = msh_line[3] # The 4th character is always the delimiter + + # Split MSH using the correct separator + msh_fields = msh_line.split(separator) + + if len(msh_fields) <= 6 or not msh_fields[6].strip(): + print(f"Warning: MSH-7 is empty or missing. MSH Line: {msh_line}") + return None + + timestamp = msh_fields[6].strip() + + # Parse timestamp format YYYYMMDDHHMMSS + return datetime.strptime(timestamp, "%Y%m%d%H%M%S") + + except Exception as e: + print(f"Error parsing MSH-7: {e} (HL7 Raw: {hl7_message[:100]})") + return None + + +def calculate_max_messages_per_second(file_pattern, top_n): + """Efficiently calculates the max messages per second from HL7 messages.""" + message_counts = Counter() + + file_list = glob.glob(file_pattern) + print(f"Processing {len(file_list)} HL7 files...") + + for filepath in file_list: + try: + with open(filepath, "r", encoding="utf-8") as file: + hl7_message = file.read() + timestamp = extract_msh_7(hl7_message) + + if timestamp: + timestamp_key = timestamp.strftime("%Y-%m-%d %H:%M:%S") + message_counts[timestamp_key] += 1 + except Exception as e: + print(f"Error reading file {filepath}: {e}") + + if not message_counts: + print("No valid timestamps found. Ensure MSH-7 exists and format is correct.") + return + + sorted_counts = sorted(message_counts.items(), key=lambda x: x[1], reverse=True)[:top_n] + print(f"Top {top_n} timestamps with highest message counts:") + for timestamp, count in sorted_counts: + print(f"{timestamp}: {count}") + + +if __name__ == "__main__": + if len(sys.argv) != 3: + print("Usage: python script.py '/path/to/files/*.hl7' ") + sys.exit(1) + + file_pattern = sys.argv[1] + top_n = int(sys.argv[2]) + if top_n <= 0 or top_n > 100: + print("Error: top_n must be a number between 1 and 100.") + sys.exit(1) + + calculate_max_messages_per_second(file_pattern, top_n)