From cd9e8acf639d19e3ded3772c8c7d0b8a85f03ef3 Mon Sep 17 00:00:00 2001
From: Alon Stein <astein@nvidia.com>
Date: Wed, 15 Jan 2025 18:02:36 +0200
Subject: [PATCH 1/3] add run_wrk_benchmark.py,
 plot_wrk_bm_tests_comparison.py, plot_wrk_bm_histograms.py

---
 .../wrk_http_bm/plot_wrk_bm_histograms.py     | 133 ++++++++
 .../plot_wrk_bm_tests_comparison.py           | 148 ++++++++
 benchmark/wrk_http_bm/readme                  |  63 ++++
 benchmark/wrk_http_bm/run_wrk_benchmark.py    | 315 ++++++++++++++++++
 4 files changed, 659 insertions(+)
 create mode 100644 benchmark/wrk_http_bm/plot_wrk_bm_histograms.py
 create mode 100644 benchmark/wrk_http_bm/plot_wrk_bm_tests_comparison.py
 create mode 100644 benchmark/wrk_http_bm/readme
 create mode 100644 benchmark/wrk_http_bm/run_wrk_benchmark.py

diff --git a/benchmark/wrk_http_bm/plot_wrk_bm_histograms.py b/benchmark/wrk_http_bm/plot_wrk_bm_histograms.py
new file mode 100644
index 0000000..fb216ec
--- /dev/null
+++ b/benchmark/wrk_http_bm/plot_wrk_bm_histograms.py
@@ -0,0 +1,133 @@
+"""
+Script: plot_wrk_bm_histograms.py
+
+Description:
+This script parses `wrk` or `wrk2` output files generated by the 'run_wrk_bm.py' script 
+and visualizes key metrics in the form of histograms. The histograms focus on latency percentiles 
+and requests per second (RPS) for individual test runs.
+
+Key Features:
+- Parses raw `wrk` output files to extract latency percentiles (50%, 75%, 90%, 99%) and RPS.
+- Visualizes latency percentiles and RPS metrics as histograms for each test run.
+- Saves the output as PNG files for easy analysis and reporting.
+
+Metrics Visualized:
+1. Latency Percentiles (50%, 75%, 90%, 99%)
+2. Requests Per Second (RPS)
+
+Usage:
+1. Run the 'run_wrk_bm.py' script to generate the `wrk` output files.
+2. Use this script to parse the output and generate histograms.
+
+Example:
+    python plot_wrk_bm_histograms.py -i ./wrk_output.txt -o ./plots
+"""
+
+import re
+import matplotlib.pyplot as plt
+import argparse
+import os
+
+def parse_wrk_output(file_path):
+    with open(file_path, 'r') as file:
+        data = file.read()
+
+    # Regex to extract latency distributions and requests/sec
+    regex = r"Running.*?@\s+(.*?)\n.*?(\d+)\sthreads and (\d+)\sconnections.*?Latency Distribution.*?50%\s+([\d\.]+)(ms|s)\s+75%\s+([\d\.]+)(ms|s)\s+90%\s+([\d\.]+)(ms|s)\s+99%\s+([\d\.]+)(ms|s).*?Requests/sec:\s+([\d\.]+)"
+    matches = re.findall(regex, data, re.DOTALL)
+
+    results = []
+    for match in matches:
+        url = match[0]
+        threads = int(match[1])
+        connections = int(match[2])
+        latencies = {
+            "50%": float(match[3]) * (1000 if match[4] == "s" else 1),
+            "75%": float(match[5]) * (1000 if match[6] == "s" else 1),
+            "90%": float(match[7]) * (1000 if match[8] == "s" else 1),
+            "99%": float(match[9]) * (1000 if match[10] == "s" else 1),
+        }
+        rps = float(match[11])  # Requests per second
+        results.append({
+            "url": url,
+            "threads": threads,
+            "connections": connections,
+            "latencies": latencies,
+            "requests_per_sec": rps,
+        })
+
+    return results
+
+def plot_histograms(results, output_file, enable_show=False):
+    colors = ["blue", "orange", "green", "red", "purple", "brown"]  # Colors for each run
+
+    # Prepare a figure with two subplots
+    fig, axes = plt.subplots(2, 1, figsize=(12, 12))
+
+    # Subplot 1: Latency Percentiles
+    for idx, result in enumerate(results):
+        latencies = result["latencies"]
+        percentiles = list(latencies.keys())
+        values = list(latencies.values())
+
+        axes[0].bar(
+            [f"{percentile} (Run {idx+1})" for percentile in percentiles],
+            values,
+            color=colors[idx % len(colors)],
+            alpha=0.7,
+            label=f"{result['url']} (Threads: {result['threads']}, Conns: {result['connections']})",
+        )
+
+    axes[0].set_title("Latency Percentiles Across Runs", fontsize=16)
+    axes[0].set_ylabel("Latency (ms)", fontsize=12)
+    axes[0].tick_params(axis="x", rotation=45, labelsize=10)
+    axes[0].legend(loc="upper left", fontsize=10)
+    axes[0].grid(axis="y", linestyle="--", alpha=0.7)
+
+    # Subplot 2: Requests Per Second (RPS)
+    for idx, result in enumerate(results):
+        rps = result["requests_per_sec"]
+
+        axes[1].bar(
+            [f"Run {idx+1}"],
+            [rps],
+            color=colors[idx % len(colors)],
+            alpha=0.7,
+            label=f"{result['url']} (Threads: {result['threads']}, Conns: {result['connections']})",
+        )
+
+    axes[1].set_title("Requests Per Second (RPS) Across Runs", fontsize=16)
+    axes[1].set_ylabel("Requests/sec", fontsize=12)
+    axes[1].tick_params(axis="x", rotation=45, labelsize=10)
+    axes[1].legend(loc="upper left", fontsize=10)
+    axes[1].grid(axis="y", linestyle="--", alpha=0.7)
+
+    # Adjust layout and save the figure
+    plt.tight_layout()
+    plt.savefig(output_file)
+
+    if enable_show:
+        plt.show()
+
+    plt.close()
+    print(f"Saved histogram to {output_file}")
+
+if __name__ == "__main__":
+    # Argument parsing
+    parser = argparse.ArgumentParser(description="Generate histograms for wrk output.")
+    parser.add_argument("-i", "--input_file", type=str,  help="Path to the wrk output file.")
+    parser.add_argument("-o", "--output_folder", type=str, help="Path to the folder to save the plots.")
+    args = parser.parse_args()
+
+    # Ensure the output folder exists
+    os.makedirs(args.output_folder, exist_ok=True)
+
+    # Parse wrk output and generate plots
+    results = parse_wrk_output(args.input_file)
+
+    # Generate output file name based on input file name
+    input_file_name = os.path.splitext(os.path.basename(args.input_file))[0]
+    output_file_name = input_file_name.replace("_results", "_histogram") + ".png"
+    output_file_path = os.path.join(args.output_folder, output_file_name)
+
+    plot_histograms(results, output_file_path)
diff --git a/benchmark/wrk_http_bm/plot_wrk_bm_tests_comparison.py b/benchmark/wrk_http_bm/plot_wrk_bm_tests_comparison.py
new file mode 100644
index 0000000..5df1669
--- /dev/null
+++ b/benchmark/wrk_http_bm/plot_wrk_bm_tests_comparison.py
@@ -0,0 +1,148 @@
+"""
+Script: plot_wrk_bm_tests_comparison.py
+
+Description:
+This script parses CSV files generated by the 'run_wrk_bm.py' tool to create comparative plots 
+of latency percentiles and requests per second (RPS) across multiple benchmark test cases.
+The script reads the CSV files, processes data grouped by unique combinations of thread/connection counts, 
+and generates plots for each combination.
+
+Key Features:
+- Parses CSV files to extract latency and RPS data.
+- Groups results by thread/connection combinations.
+- Generates comparative plots of latency percentiles and RPS across test cases.
+- Saves the output as PNG files for easy visualization.
+
+Usage:
+1. Ensure the CSV files (e.g., bm_results_<test_case>.csv) are generated by the 'run_wrk_bm.py' tool.
+2. Run this script, specifying the folder containing the CSV files and an output folder for the plots.
+
+Example:
+    python plot_wrk_bm_tests_comparison.py -i ./csv_files -o ./plots
+"""
+import os
+import argparse
+import pandas as pd
+import matplotlib.pyplot as plt
+
+def parse_csv_file(file_path):
+    # Read the CSV file into a pandas DataFrame
+    df = pd.read_csv(file_path, header=0)
+    
+    # Rename columns to ensure consistent format
+    df.rename(columns={
+        "Latency (ms)": "latency",
+        "Requests/sec": "requests_per_sec",
+        "Threads": "threads",
+        "Connections": "connections",
+        "API": "url"
+    }, inplace=True)
+    
+    # Extract the endpoint postfix from the URL
+    df["url"] = df["url"].apply(lambda x: "/".join(x.split("/", 3)[-1:]))
+    return df
+
+def strip_prefix(url, prefixes):
+    for prefix in prefixes:
+        if prefix in url:
+            return url.split(prefix, 1)[-1]  # Remove the prefix and keep everything after it
+    return url  # Return the original URL if no prefix matches
+
+# generates plots per connection_threads
+def plot_per_connection_threads(dataframes, output_folder):
+    
+    # Create a combined DataFrame with all results
+    combined_df = pd.concat(dataframes, keys=[df.name for df in dataframes], names=["test_case"])
+    
+    # Ensure column names are consistent
+    if "latency" not in combined_df.columns or "requests_per_sec" not in combined_df.columns:
+        raise ValueError("Required columns (latency, requests_per_sec) are missing in the input data.")
+
+    # Transform `url` column to strip prefixes globally
+    endpoint_skip = ["ufmRest/"]
+    combined_df["url"] = combined_df["url"].apply(lambda url: strip_prefix(url, endpoint_skip))
+
+    # Create unique identifiers for connection_threads
+    combined_df["connection_threads"] = combined_df.apply(lambda x: f"{x['connections']}_{x['threads']}", axis=1)
+    
+    # Get unique connection_threads combinations
+    unique_combinations = combined_df["connection_threads"].unique()
+
+    # Generate plots for each connection_threads
+    for combination in unique_combinations:
+
+        subset = combined_df[combined_df["connection_threads"] == combination]
+        endpoint_skip = ["ufmRest/"]
+        endpoints = subset["url"].unique()
+
+        fig, axes = plt.subplots(2, 1, figsize=(14, 12))
+
+        # Subplot 1: Latency Comparison
+        for test_case, group in subset.groupby("test_case"):
+            avg_latency = group.groupby("url")["latency"].mean()
+            axes[0].plot(
+                avg_latency.index,
+                avg_latency,
+                marker="o",
+                label=f"{test_case} (Avg Latency)"
+            )
+
+        axes[0].set_title(f"Average Latency Comparison (Connections: {combination})", fontsize=16)
+        axes[0].set_ylabel("Latency (ms)", fontsize=12)
+        axes[0].set_xticks(range(len(endpoints)))
+        axes[0].set_xticklabels(endpoints, rotation=45, ha="right")
+        axes[0].legend(fontsize=10)
+        axes[0].grid(True)
+
+        # Subplot 2: Requests Per Second Comparison
+        for test_case, group in subset.groupby("test_case"):
+            reqs_per_sec = group.groupby("url")["requests_per_sec"].mean()
+            axes[1].plot(
+                reqs_per_sec.index,
+                reqs_per_sec,
+                marker="o",
+                label=f"{test_case} (Requests per Second)"
+            )
+
+        axes[1].set_title(f"Requests Per Second Comparison (Connections: {combination})", fontsize=16)
+        axes[1].set_ylabel("Requests per Second", fontsize=12)
+        axes[1].set_xlabel("Endpoints", fontsize=12)
+        axes[1].set_xticks(range(len(endpoints)))
+        axes[1].set_xticklabels(endpoints, rotation=45, ha="right")
+        axes[1].legend(fontsize=10)
+        axes[1].grid(True)
+
+        # Save the plot
+        output_file = os.path.join(output_folder, f"bm_comparison_tc_{combination}.png")
+        plt.tight_layout()
+        plt.savefig(output_file)
+        plt.close()
+        print(f"Plot saved for connection_threads {combination}: {output_file}")
+
+if __name__ == "__main__":
+    
+    # parse arguments
+    parser = argparse.ArgumentParser(description="Compare benchmark results from CSV files and generate plots.")
+    parser.add_argument("-i", "--input_folder", type=str, required=True, help="Path to the folder containing CSV files.")
+    parser.add_argument("-o", "--output_folder", type=str, required=True, help="Path to save the combined output PNG file.")
+    args = parser.parse_args()
+
+    # Read 'CSV files in the input folder ends with '_bm.csv'
+    input_files = [f for f in os.listdir(args.input_folder) if f.endswith("_bm.csv")]
+
+    if not input_files:
+        print("No matching files found in the input folder.")
+        exit(1)
+
+    dataframes = []
+    for file_name in input_files:
+        test_case = os.path.splitext(file_name)[0].replace("_bm", "")
+        file_path = os.path.join(args.input_folder, file_name)
+        df = parse_csv_file(file_path)
+        df.name = test_case  # Assign the test case name to the DataFrame
+        dataframes.append(df)
+
+    os.makedirs(args.output_folder, exist_ok=True)
+
+    # Generate plots per connection_threads
+    plot_per_connection_threads(dataframes, args.output_folder)    
diff --git a/benchmark/wrk_http_bm/readme b/benchmark/wrk_http_bm/readme
new file mode 100644
index 0000000..bcd5311
--- /dev/null
+++ b/benchmark/wrk_http_bm/readme
@@ -0,0 +1,63 @@
+# run_wrk_bm.py
+run_wrk_bm.py is a Python wrapper for the wrk/wrk2 tools, designed to simplify their usage and provide additional functionality.
+
+# Features
+Simplified Wrk/Wrk2 Integration: Wraps the functionality of wrk and wrk2 for easy execution and configuration.
+Test Configuration Management: Parses and manages test settings from a configuration file.
+Result Visualization: Supports plotting test results for better insights and analysis.
+Configuration Example
+For an example configuration file, refer to:
+UFM/gvvm/authentication_server/benchmark
+
+# run with -h for args list
+python run_wrk_bm.py -h
+
+# Prerequisites:
+pandas, matplotlib (pip install)
+wrk, wrk2 (@see Wrk/Wrk2 Installation guide at the end of this document)
+
+# Wrk/wrk2 Overview
+-------------------
+wrk and wrk2 are high-performance HTTP benchmarking tools designed to test the throughput and latency of web servers or APIs.
+
+# wrk
+A modern HTTP benchmarking tool that generates significant load.
+Features:
+Uses multiple threads and connections for concurrency.
+Provides latency distribution statistics.
+Flexible scripting support in Lua to customize requests.
+Typically used for load testing APIs, web servers, or any HTTP endpoint.
+In wrk, there is no explicit control over the request rate.
+
+# wrk2
+An enhanced version of wrk with a focus on constant request rate testing.
+Differences from wrk:
+Adds rate-limiting (-R flag) to send requests at a constant rate (e.g., 1000 requests/second).
+Ideal for latency benchmarking under steady-state conditions.
+Also supports Lua scripting and provides latency distribution output.
+
+It is built to send requests at a constant rate to simulate real-world scenarios where systems operate at a steady load
+rather than trying to overwhelm them.
+
+Using wrk or wrk2 depends on what you're trying to achieve:
+Use Case Examples:
+wrk: For generating high, unrestricted load to test a server's maximum capacity.
+wrk2: For precisely controlled testing to measure latency at a fixed request rate.
+
+Use wrk for Load Testing:
+If your goal is to find the system's limits: 
+to push the system and identify maximum throughput and bottlenecks.
+
+Use wrk2 for Constant Throughput Testing
+If your goal is to evaluate system behavior under real-world usage scenarios:
+to control the request rate and measure latency and stability under defined conditions.
+
+# Wrk/Wrk2 Installation:
+
+git clone https://github.com/wg/wrk.git
+cd wrk
+make
+
+git clone https://github.com/giltene/wrk2.git
+cd wrk2
+make
diff --git a/benchmark/wrk_http_bm/run_wrk_benchmark.py b/benchmark/wrk_http_bm/run_wrk_benchmark.py
new file mode 100644
index 0000000..3209ad7
--- /dev/null
+++ b/benchmark/wrk_http_bm/run_wrk_benchmark.py
@@ -0,0 +1,315 @@
+"""
+Script: run_wrk_benchmark.py
+
+Description:
+This script wraps the `wrk` and `wrk2` tools, providing a streamlined interface for running 
+benchmark tests defined in a YAML configuration file. It automates the execution of benchmarks 
+with user-defined test cases and parameters, such as URLs, headers, threads, and connections. 
+The script outputs both the raw `wrk`/`wrk2` results and processed CSV files for further analysis.
+
+Key Features:
+- Supports both `wrk` (load testing) and `wrk2` (rate-controlled testing).
+- Parses a YAML configuration file (`config.yaml`) to define benchmark test cases.
+- Outputs `wrk`/`wrk2` raw results and processed CSV files for each test case.
+- Simplifies the execution of benchmarks with configurable parameters.
+
+Output Files:
+1. **Raw Output**:
+   - `wrk`/`wrk2` raw output files for each test case, named `<test_case>_bm_results.txt`.
+2. **CSV Files**:
+   - Processed results as CSV files, named `<test_case>_bm.csv`.
+
+Usage:
+1. Define your benchmark test cases in `config.yaml`.
+2. Run this script to execute the benchmarks and generate outputs.
+
+Example:
+    python run_wrk_benchmark.py --config_file ./config.yaml --wrk wrk2 --out_path ./results
+"""
+
+import os
+import subprocess
+import re
+import yaml
+import sys
+import ast
+import base64
+import argparse
+# pylint: disable=import-error
+import pandas as pd
+import matplotlib.pyplot as plt
+from urllib.parse import urlparse
+from pathlib import Path
+
+def parse_latency(output):
+    """
+    Parses the output of a latency measurement and extracts the average latency value.
+    Args:
+        output (str): The output string containing the latency measurement.
+    Returns:
+        float or None: The average latency value if found, None otherwise.
+    """
+    # Regex to match Latency Avg value in ms or s
+    match = re.search(r"Latency\s+(\d+\.\d+)(ms|s)", output)
+    if match:
+        avg_latency = float(match.group(1))
+        unit = match.group(2)
+        if unit == "s":
+            avg_latency *= 1000  # Convert seconds to milliseconds
+        return avg_latency
+    else:
+        print("Latency not found in output")
+    return None
+
+def parse_rps(output):
+    """
+    Parses the output of a benchmark test and extracts the Requests per Second (RPS) value.
+    Args:
+        output (str): The output of the benchmark test.
+    Returns:
+        float or None: The RPS value if found in the output, None otherwise.
+    """
+    match = re.search(r'Requests/sec:\s+([\d\.]+)', output)
+    if match:
+        rps = float(match.group(1))
+        return rps
+    return None
+
+def run_wrk(wrk_conf, wrk_v, api, threads, connections, wrk_path):
+    """
+    Run the wrk benchmark tool with the specified parameters.
+
+    Args:
+        wrk_conf the wrk config file
+        api: the config api (url, header [optional] etc.)
+        threads (int): The number of threads to use for the benchmark.
+        connections (int): The number of connections to use for the benchmark.        
+        duration (in) duration to run in seconds
+        wrk_path (str): The path to the wrk executable.
+
+    Returns:
+        str: The output of the wrk benchmark tool.
+    """
+    url = api["url"]
+    headers = []
+
+    duration = wrk_conf.get("duration", "3s")  # Default duration is 3 seconds
+
+    rate = 0
+    if wrk_v == "wrk2":
+        rate = int(wrk_conf.get("rate", "10"))  # Default rate 10 (wrk2)
+
+    # Add headers from the configuration
+    if "headers" in api:
+        for key, value in api["headers"].items():
+            # wrk does not handle inline shell commands like $(echo -n 'admin:123456' | base64) such as curl do
+            # need to call it explicit e.g. for Authorization: Basic
+            if key == "Authorization":
+                # Check if it's Basic auth, but only base64-encode the credentials if needed
+                if "Basic" not in value and ":" in value:
+                    credentials = value.strip()  # user:pass
+                    encoded = base64.b64encode(credentials.encode()).decode()
+                    headers.extend(['-H', f'"{key}: Basic {encoded}"'])
+                else:
+                    # If it's token-based, don't encode it
+                    headers.extend(['-H', f'"{key}: {value}"'])
+            else:
+                headers.extend(['-H', f'"{key}: {value}"'])
+    cmd = [
+        wrk_path,
+        f"-t{threads}",
+        f"-c{connections}",
+        f"-d{duration}",
+        "--latency",
+        url,        
+    ] + headers
+
+    # the -R <rate> option is applicable only for wrk2
+    if wrk_v == "wrk2":
+        cmd += [f"-R{rate}"]
+
+    # todo : for lua script appenf "-s" lua_script
+
+    command = ' '.join(cmd)
+    print("Running command: ", command)
+
+    result = subprocess.run(command, shell=True, capture_output=True, text=True, check=False)
+    if result.returncode != 0:
+        print("Benchmarking failed. Error message:", result.stderr)
+        return None
+       
+    return result.stdout
+
+def run_benchmark(bm_set, wrk_v, enable_plotting, benchmark_name, threads_connections, out_path, wrk_path="/usr/local/bin/wrk"):
+    """
+    Run benchmark tests for multiple APIs using wrk.
+
+    Args:
+        bm_set the benchmark test params set (dictionary)
+        wrk_v wrk version, wrk2 will look for 'rate' attribute in the config
+        enable_plotting determines whether generate plotting  
+        benchmark_name (str): The name of the benchmark.
+        threads_connections:
+                list of tuple of threads&connections test case.
+                For example [(1, 1), (2, 2), (4, 4), (8, 8), (16, 16)]
+        out_path path to store the output files
+        wrk_path (str, optional): The path to the wrk executable. Defaults to "/usr/local/bin/wrk".
+
+    Returns:
+        None
+    """
+
+    apis = bm_set.get("benchmark_apis", [])
+
+    results = pd.DataFrame(columns=["API", "Threads", "Connections", "Latency (ms)", "Requests/sec"])
+    outputs = ""
+
+    for api in apis:
+        for t, c in threads_connections:
+            output = run_wrk(bm_set, wrk_v, api, t, c, wrk_path)
+
+            if output == None:
+                print("Failed to run_wrk")
+                sys.exit(1)
+
+            outputs += output + "\n"
+
+            try:
+                # Parse the output to extract Latency and RPS, then append to the DataFrame
+                # This parsing depends on the specific format of wrk's output
+                latency = float(parse_latency(output))
+                rps = float(parse_rps(output))
+            except ValueError:
+                # Handle invalid data gracefully, e.g., skip this row
+                print(f"Invalid data encountered for API {api['url']} with Threads {t} and Connections {c}")
+                continue  # Skip this row
+
+            new_row = pd.DataFrame([{
+                "API": api["url"],
+                "Threads": t,
+                "Connections": c,
+                "Latency (ms)": latency,
+                "Requests/sec": rps
+            }])
+            results = pd.concat([results, new_row], ignore_index=True)
+
+    if not os.path.exists(out_path):
+        os.makedirs(out_path)
+
+    out_csv = os.path.join(out_path, f"{benchmark_name}_bm.csv")
+    out_test_file = os.path.join(out_path, f"{benchmark_name}_bm_results.txt")
+
+    results.to_csv(out_csv, index=False)
+
+    with open(out_test_file, "w") as file:  # 'w' mode overwrites the file if it exists
+        file.write(outputs)
+
+    if enable_plotting:
+        gen_plotting(benchmark_name, apis, results, out_path)
+
+    print(f"Benchmarking completed. Output files stored to '{out_path}'")
+
+# Plotting
+def gen_plotting(benchmark_name, apis, results, out_path):
+    for idx, api in enumerate(apis):
+        api_results = results[results["API"] == api["url"]]
+        plt.figure(figsize=(10, 5))
+        plt.subplot(1, 2, 1)
+        plt.plot(api_results["Threads"], api_results["Latency (ms)"], label="Latency")
+        plt.title("Latency")
+        plt.xlabel("Threads")
+        plt.ylabel("Latency (ms)")
+        plt.legend()
+
+        plt.subplot(1, 2, 2)
+        plt.plot(api_results["Threads"], api_results["Requests/sec"], label="Requests/sec")
+        plt.title("Requests/sec")
+        plt.xlabel("Threads")
+        plt.ylabel("Requests/sec")
+        plt.legend()
+
+        plt.tight_layout()        
+        plt.savefig(f"{out_path}/benchmark_{api['url'].split('/')[-1]}_{benchmark_name}_{idx}.png")
+        plt.close()
+
+def parse_arguments():
+    """
+    Parse command line arguments.
+
+    Returns:
+        argparse.Namespace: Parsed command line arguments.
+    """
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('-t', "--test_name", type=str, help="The name of the benchmark test to run, as specified in the configuration file", required=True)
+    parser.add_argument('-w', "--wrk", type=str, help="Specify the tool to use: 'wrk' for load testing or 'wrk2' for controlled rate testing (default: 'wrk')", default="wrk")
+    parser.add_argument('-c', "--config_file", type=str, default="./config.yaml", help="Path to the configuration file (default: './config.yaml')", required=True)
+    parser.add_argument('-o', "--out_path", type=str, default="/tmp/benchmark/", help="Path to the output folder for saving result files (default: '/tmp/benchmark/')")    
+    parser.add_argument('-p', '--plotting', action='store_true', help='Enable plotting')
+    ret_args = parser.parse_args()
+    return ret_args
+
+def load_config(config_file):
+    try:
+        with open(config_file, 'r') as file:
+            return yaml.safe_load(file)
+    except FileNotFoundError:
+        print(f"Error: Configuration file '{config_file}' not found.")
+        sys.exit(1)
+    except yaml.YAMLError as e:
+        print(f"Error parsing YAML file: {e}")
+        sys.exit(1)
+
+def get_wrk_exe(wrk_type):
+    # Reading an environment variable
+    wrk_path = os.getenv("WRK2_PATH") if wrk_type == "wrk2" else os.getenv("WRK_PATH")
+    if not wrk_path:
+        print("Error: WRK_PATH|WRK2_PATH is not set.")
+        sys.exit(1)
+
+    # Create a Path object for the wrk_path
+    wrk_executable = Path(wrk_path)
+
+    # Check if the path exists and is a file
+    if not wrk_executable.is_file():
+        print(f"Error: The path {wrk_path} does not point to a valid file.")
+        sys.exit(1)
+    
+    # Check if the file is executable
+    if not os.access(wrk_executable, os.X_OK):
+        print(f"Error: The file {wrk_path} is not executable.")
+        sys.exit(1)
+            
+    return wrk_path
+
+if __name__ == "__main__":
+
+    args = parse_arguments()
+
+    wrk_exe = get_wrk_exe(args.wrk)
+    if wrk_exe is None:
+        print("Error: WRK_PATH or WRK2_PATH environment variable is not set or invalid. Please export the correct path to the wrk or wrk2 executable.")
+        sys.exit(1)
+
+    bm_name = args.test_name
+    out_path = args.out_path
+
+    # Load configuration (todo - allow override name in args)
+    wrk_config = load_config(args.config_file)
+    wrk_bm = wrk_config.get("wrk_bm", {})
+
+    if bm_name not in wrk_bm:
+        print(f"Error: Test case '{bm_name}' not found in configuration file.")
+        print(f"Available bm : {', '.join(wrk_bm.keys())}")
+        sys.exit(1)
+
+    # Extract the threads_connections for the selected test case
+    raw_threads_connections = wrk_bm[bm_name].get("threads_connections", [])
+    # Convert the raw YAML tuples (stored as strings) back to Python tuples
+    threads_connections = [ast.literal_eval(item) for item in raw_threads_connections]
+
+    out_path = wrk_config.get("wrk_output_path", {})
+    # append to out path wrk/wrk2
+    out_path = os.path.join(out_path, args.wrk)
+
+    run_benchmark(wrk_bm[bm_name], args.wrk, args.plotting, bm_name, threads_connections, out_path=out_path, wrk_path=wrk_exe)

From dfe73bdb188184abfaff45ddb2691f45ed3c10fa Mon Sep 17 00:00:00 2001
From: Alon Stein <astein@nvidia.com>
Date: Sun, 19 Jan 2025 20:04:38 +0200
Subject: [PATCH 2/3] 1. update config - allow set 'rate' per api (endpoint) 2.
 add to histogram bins value

---
 .../wrk_http_bm/plot_wrk_bm_histograms.py     | 28 +++++++++++++++++--
 benchmark/wrk_http_bm/run_wrk_benchmark.py    |  3 +-
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/benchmark/wrk_http_bm/plot_wrk_bm_histograms.py b/benchmark/wrk_http_bm/plot_wrk_bm_histograms.py
index fb216ec..64c36fa 100644
--- a/benchmark/wrk_http_bm/plot_wrk_bm_histograms.py
+++ b/benchmark/wrk_http_bm/plot_wrk_bm_histograms.py
@@ -70,7 +70,7 @@ def plot_histograms(results, output_file, enable_show=False):
         percentiles = list(latencies.keys())
         values = list(latencies.values())
 
-        axes[0].bar(
+        bars = axes[0].bar(
             [f"{percentile} (Run {idx+1})" for percentile in percentiles],
             values,
             color=colors[idx % len(colors)],
@@ -78,6 +78,18 @@ def plot_histograms(results, output_file, enable_show=False):
             label=f"{result['url']} (Threads: {result['threads']}, Conns: {result['connections']})",
         )
 
+        # add text labels on the bars
+        for bar in bars:
+            height = bar.get_height()
+            axes[0].text(
+                bar.get_x() + bar.get_width() / 2,
+                height,
+                f'{height:.2f}',
+                ha='center',
+                va='bottom',
+                fontsize=10
+            )
+
     axes[0].set_title("Latency Percentiles Across Runs", fontsize=16)
     axes[0].set_ylabel("Latency (ms)", fontsize=12)
     axes[0].tick_params(axis="x", rotation=45, labelsize=10)
@@ -88,7 +100,7 @@ def plot_histograms(results, output_file, enable_show=False):
     for idx, result in enumerate(results):
         rps = result["requests_per_sec"]
 
-        axes[1].bar(
+        bars = axes[1].bar(
             [f"Run {idx+1}"],
             [rps],
             color=colors[idx % len(colors)],
@@ -96,6 +108,18 @@ def plot_histograms(results, output_file, enable_show=False):
             label=f"{result['url']} (Threads: {result['threads']}, Conns: {result['connections']})",
         )
 
+        # Add text labels on the bars
+        for bar in bars:
+            height = bar.get_height()
+            axes[1].text(
+                bar.get_x() + bar.get_width() / 2,
+                height,
+                f'{height:.2f}',
+                ha='center',
+                va='bottom',
+                fontsize=10
+            )
+
     axes[1].set_title("Requests Per Second (RPS) Across Runs", fontsize=16)
     axes[1].set_ylabel("Requests/sec", fontsize=12)
     axes[1].tick_params(axis="x", rotation=45, labelsize=10)
diff --git a/benchmark/wrk_http_bm/run_wrk_benchmark.py b/benchmark/wrk_http_bm/run_wrk_benchmark.py
index 3209ad7..f062e85 100644
--- a/benchmark/wrk_http_bm/run_wrk_benchmark.py
+++ b/benchmark/wrk_http_bm/run_wrk_benchmark.py
@@ -97,7 +97,8 @@ def run_wrk(wrk_conf, wrk_v, api, threads, connections, wrk_path):
 
     rate = 0
     if wrk_v == "wrk2":
-        rate = int(wrk_conf.get("rate", "10"))  # Default rate 10 (wrk2)
+        default_rate = wrk_conf.get("rate", "10")  # read default test rate
+        rate = int(api.get("rate", default_rate))  # read rate set in api url
 
     # Add headers from the configuration
     if "headers" in api:

From a669ff347583f40111e88bf2f4c6ffc33d3d70d9 Mon Sep 17 00:00:00 2001
From: Alon Stein <astein@nvidia.com>
Date: Mon, 3 Feb 2025 12:12:48 +0200
Subject: [PATCH 3/3] add timeout to wrk config param + fix plot bug

---
 .../plot_wrk_bm_tests_comparison.py           | 21 ++++++++++++-------
 benchmark/wrk_http_bm/run_wrk_benchmark.py    | 15 ++++++++++---
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/benchmark/wrk_http_bm/plot_wrk_bm_tests_comparison.py b/benchmark/wrk_http_bm/plot_wrk_bm_tests_comparison.py
index 5df1669..c1f5b73 100644
--- a/benchmark/wrk_http_bm/plot_wrk_bm_tests_comparison.py
+++ b/benchmark/wrk_http_bm/plot_wrk_bm_tests_comparison.py
@@ -79,7 +79,9 @@ def plot_per_connection_threads(dataframes, output_folder):
 
         # Subplot 1: Latency Comparison
         for test_case, group in subset.groupby("test_case"):
-            avg_latency = group.groupby("url")["latency"].mean()
+            group["url"] = pd.Categorical(group["url"], categories=endpoints, ordered=True)
+            avg_latency = group.groupby("url", observed=False)["latency"].mean().reindex(endpoints)
+
             axes[0].plot(
                 avg_latency.index,
                 avg_latency,
@@ -89,14 +91,17 @@ def plot_per_connection_threads(dataframes, output_folder):
 
         axes[0].set_title(f"Average Latency Comparison (Connections: {combination})", fontsize=16)
         axes[0].set_ylabel("Latency (ms)", fontsize=12)
-        axes[0].set_xticks(range(len(endpoints)))
-        axes[0].set_xticklabels(endpoints, rotation=45, ha="right")
+        axes[0].set_xticks(range(len(avg_latency.index)))
+        axes[0].set_xticklabels(avg_latency.index, rotation=45, ha="right")
+
         axes[0].legend(fontsize=10)
         axes[0].grid(True)
 
         # Subplot 2: Requests Per Second Comparison
         for test_case, group in subset.groupby("test_case"):
-            reqs_per_sec = group.groupby("url")["requests_per_sec"].mean()
+            group["url"] = pd.Categorical(group["url"], categories=endpoints, ordered=True)
+            reqs_per_sec = group.groupby("url", observed=False)["requests_per_sec"].mean().reindex(endpoints)
+
             axes[1].plot(
                 reqs_per_sec.index,
                 reqs_per_sec,
@@ -107,8 +112,9 @@ def plot_per_connection_threads(dataframes, output_folder):
         axes[1].set_title(f"Requests Per Second Comparison (Connections: {combination})", fontsize=16)
         axes[1].set_ylabel("Requests per Second", fontsize=12)
         axes[1].set_xlabel("Endpoints", fontsize=12)
-        axes[1].set_xticks(range(len(endpoints)))
-        axes[1].set_xticklabels(endpoints, rotation=45, ha="right")
+        axes[1].set_xticks(range(len(reqs_per_sec.index)))
+        axes[1].set_xticklabels(reqs_per_sec.index, rotation=45, ha="right")
+
         axes[1].legend(fontsize=10)
         axes[1].grid(True)
 
@@ -117,7 +123,8 @@ def plot_per_connection_threads(dataframes, output_folder):
         plt.tight_layout()
         plt.savefig(output_file)
         plt.close()
-        print(f"Plot saved for connection_threads {combination}: {output_file}")
+
+        print(f"Plot (v1.0) saved for connection_threads {combination}: {output_file}")
 
 if __name__ == "__main__":
     
diff --git a/benchmark/wrk_http_bm/run_wrk_benchmark.py b/benchmark/wrk_http_bm/run_wrk_benchmark.py
index f062e85..7dbfe90 100644
--- a/benchmark/wrk_http_bm/run_wrk_benchmark.py
+++ b/benchmark/wrk_http_bm/run_wrk_benchmark.py
@@ -50,7 +50,7 @@ def parse_latency(output):
         float or None: The average latency value if found, None otherwise.
     """
     # Regex to match Latency Avg value in ms or s
-    match = re.search(r"Latency\s+(\d+\.\d+)(ms|s)", output)
+    match = re.search(r"Latency\s+(\d+(?:\.\d+)?)(us|ms|s)", output)
     if match:
         avg_latency = float(match.group(1))
         unit = match.group(2)
@@ -94,11 +94,17 @@ def run_wrk(wrk_conf, wrk_v, api, threads, connections, wrk_path):
     headers = []
 
     duration = wrk_conf.get("duration", "3s")  # Default duration is 3 seconds
+    timeout = wrk_conf.get("timeout", "2s")  # Default timeout is 2 seconds
 
     rate = 0
     if wrk_v == "wrk2":
-        default_rate = wrk_conf.get("rate", "10")  # read default test rate
-        rate = int(api.get("rate", default_rate))  # read rate set in api url
+        rate = int(wrk_conf.get("rate", "10"))  # Default rate 10 (wrk2)
+
+    if "timeout" in api:
+        timeout = api["timeout"]
+
+    if "rate" in api:
+        rate = api["rate"]
 
     # Add headers from the configuration
     if "headers" in api:
@@ -122,6 +128,8 @@ def run_wrk(wrk_conf, wrk_v, api, threads, connections, wrk_path):
         f"-c{connections}",
         f"-d{duration}",
         "--latency",
+        "--timeout",
+        f"{timeout}",
         url,        
     ] + headers
 
@@ -176,6 +184,7 @@ def run_benchmark(bm_set, wrk_v, enable_plotting, benchmark_name, threads_connec
             outputs += output + "\n"
 
             try:
+
                 # Parse the output to extract Latency and RPS, then append to the DataFrame
                 # This parsing depends on the specific format of wrk's output
                 latency = float(parse_latency(output))