diff --git a/README.md b/README.md
index 38fc88b..748ccaf 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,7 @@ tracestorm --model "Qwen/Qwen2.5-1.5B-Instruct" --pattern azure_code
 #### Example Command for Loading Prompts from Datasets
 
 ```bash
-tracestorm --model "Qwen/Qwen2.5-1.5B-Instruct" --duration 30 --datasets-config-file ./examples/datasets_config_hf.json
+tracestorm --model "Qwen/Qwen2.5-1.5B-Instruct" --duration 30 --datasets-config ./examples/datasets_config_hf.json
 ```
 
 
@@ -60,7 +60,7 @@ tracestorm --model "Qwen/Qwen2.5-1.5B-Instruct" --duration 30 --datasets-config-
 - Refer to `./examples/datasets_config_local.json` for an example configuration.
 - If you want to test loading from local files, please run `./examples/test_data_loader.py` first to download and save two datasets.
 
-2. Remote datasets from Hugging Face 
+2. Remote datasets from Hugging Face
 - Refer to `./examples/datasets_config_hf.json` for an example configuration.
 
 **Sorting Strategy**: Defines how prompts from multiple datasets are ordered
@@ -85,6 +85,6 @@ Please check `./examples/datasets_config_default.json` for required fields in `d
 - `--base-url`: Optional. OpenAI Base URL (default is `http://localhost:8000/v1`).
 - `--api-key`: Optional. OpenAI API Key (default is `none`).
 - `--seed`: Optional. Random seed for trace pattern reproducibility (default is `none`).
-- `--datasets-config-file`: Optional. Configuration file for loading prompt messages from provided datasets. Uses `DEFAULT_MESSAGES` is not specified.
+- `--datasets-config`: Optional. Configuration file for loading prompt messages from provided datasets. Uses `DEFAULT_MESSAGES` is not specified.
 
 Make sure to adjust the parameters according to your testing needs!
diff --git a/examples/datasets_config_default.json b/examples/datasets_config_default.json
new file mode 100644
index 0000000..0746f64
--- /dev/null
+++ b/examples/datasets_config_default.json
@@ -0,0 +1,14 @@
+{
+    "sort_strategy": "random",
+    "dataset_1": {
+        "file_name": "",
+        "prompt_field": "",
+        "select_ratio": 1,
+        "split": "train"
+    },
+    "dataset_2": {
+        "file_name": "",
+        "prompt_field": "",
+        "select_ratio": 1
+    }
+}
\ No newline at end of file
diff --git a/examples/datasets_config_hf.json b/examples/datasets_config_hf.json
new file mode 100644
index 0000000..9539d50
--- /dev/null
+++ b/examples/datasets_config_hf.json
@@ -0,0 +1,15 @@
+{
+    "sort_strategy": "original",
+    "dataset_1": {
+        "file_name": "hf://datasets/fka/awesome-chatgpt-prompts/prompts.csv",
+        "prompt_field": "prompt",
+        "select_ratio": 2,
+        "split": "train"
+    },
+    "dataset_2": {
+        "file_name": "MAsad789565/Coding_GPT4_Data",
+        "prompt_field": "user",
+        "select_ratio": 8,
+        "split": "train"
+    }
+}
\ No newline at end of file
diff --git a/examples/datasets_config_local.json b/examples/datasets_config_local.json
new file mode 100644
index 0000000..0ca6d96
--- /dev/null
+++ b/examples/datasets_config_local.json
@@ -0,0 +1,13 @@
+{
+    "sort_strategy": "random",
+    "dataset_1": {
+        "file_name": "Conversational_dataset.jsonl",
+        "prompt_field": "messages",
+        "select_ratio": 6
+    },
+    "dataset_2": {
+        "file_name": "~/.cache/tracestorm/GPT4_coding_sample.csv",
+        "prompt_field": "user",
+        "select_ratio": 4
+    }
+}
\ No newline at end of file
diff --git a/examples/save_test_datasets.py b/examples/save_test_datasets.py
new file mode 100644
index 0000000..2aee421
--- /dev/null
+++ b/examples/save_test_datasets.py
@@ -0,0 +1,28 @@
+import os
+
+import pandas as pd
+
+from tracestorm.constants import DEFAULT_DATASET_FOLDER
+
+
+def prepare_test_datasets():
+    df1 = pd.read_json(
+        "hf://datasets/MAsad789565/Coding_GPT4_Data/Data/GPT_4_Coding.json"
+    )
+    df2 = pd.read_json(
+        "hf://datasets/olathepavilion/Conversational-datasets-json/Validation.jsonl",
+        lines=True,
+    )
+
+    # save the pre-processed dataset to the default folder for test
+    os.makedirs(DEFAULT_DATASET_FOLDER, exist_ok=True)
+    path1 = os.path.join(DEFAULT_DATASET_FOLDER, "GPT4_coding_sample.csv")
+    path2 = os.path.join(DEFAULT_DATASET_FOLDER, "Conversational_dataset.jsonl")
+
+    # test with different file formats
+    df1.to_csv(path1, index=False)
+    df2.to_json(path2, orient="records", lines=True)
+
+
+if __name__ == "__main__":
+    prepare_test_datasets()
diff --git a/pyproject.toml b/pyproject.toml
index cbc3b0c..1fc7b00 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,9 +12,11 @@ authors = [
 ]
 
 dependencies = [
+    "datasets>=3.3.2",
     "openai>=1.58.0",
     "numpy>=1.26.4",
     "pandas>=2.2.3",
+    "requests>=2.31.0",
     "seaborn>=0.13.2",
     "matplotlib>=3.9",
     "click>=8.1.8"
@@ -43,4 +45,7 @@ ignore = ["B007"]  # Loop control variable not used within loop body
 
 [tool.isort]
 use_parentheses = true
-skip_gitignore = true
\ No newline at end of file
+skip_gitignore = true
+
+[tool.setuptools]
+packages = { find = { exclude = ["examples"] } }
diff --git a/requirements.txt b/requirements.txt
index ba8db04..8f874c0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,8 @@
+click>=8.1.8
+datasets>=3.3.2
 matplotlib>=3.9
 numpy>=1.26.4
+openai>=1.58.0
 pandas>=2.2.3
 requests>=2.31.0
 seaborn>=0.13.2
diff --git a/tests/test_cli.py b/tests/test_cli.py
index e435630..8ed12a2 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -81,7 +81,7 @@ def test_cli_invalid_pattern(self):
         )
 
         self.assertNotEqual(result.exit_code, 0)
-        self.assertIn("Invalid pattern", result.output)
+        self.assertIn("Invalid value for '--pattern'", result.output)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py
new file mode 100644
index 0000000..b323ddf
--- /dev/null
+++ b/tests/test_data_loader.py
@@ -0,0 +1,63 @@
+import os
+import unittest
+
+import pandas as pd
+
+from tracestorm.constants import DEFAULT_DATASET_FOLDER
+from tracestorm.data_loader import Dataset, load_datasets
+
+
+class TestDataLoader(unittest.TestCase):
+    def test_remote_files(self):
+        """
+        Test loading datasets from hugging face.
+        There are 2 datasets, testing for:
+        1. loading with datasets.load_dataset
+        2. loading csv format with pandas
+        """
+        datasets, sort = load_datasets("examples/datasets_config_hf.json")
+        assert isinstance(datasets, list)
+        assert isinstance(datasets[0], Dataset) and isinstance(
+            datasets[1], Dataset
+        )
+        assert sort == "original"
+        assert len(datasets) == 2
+        assert datasets[0].select_ratio == 2 and datasets[1].select_ratio == 8
+        assert datasets[0].length > 0 and datasets[1].length > 0
+
+    def test_local_files(self):
+        """Test loading from local files"""
+
+        os.makedirs(DEFAULT_DATASET_FOLDER, exist_ok=True)
+        # testing datasets
+        df1 = pd.read_json(
+            "hf://datasets/MAsad789565/Coding_GPT4_Data/Data/GPT_4_Coding.json"
+        )
+        df2 = pd.read_json(
+            "hf://datasets/olathepavilion/Conversational-datasets-json/Validation.jsonl",
+            lines=True,
+        )
+
+        # test with different file formats
+        path1 = os.path.join(DEFAULT_DATASET_FOLDER, "GPT4_coding_sample.csv")
+        path2 = os.path.join(
+            DEFAULT_DATASET_FOLDER, "Conversational_dataset.jsonl"
+        )
+
+        # save the pre-processed dataset to the default folder for test
+        df1.to_csv(path1, index=False)
+        df2.to_json(path2, orient="records", lines=True)
+
+        datasets, sort = load_datasets("examples/datasets_config_local.json")
+        assert isinstance(datasets, list)
+        assert isinstance(datasets[0], Dataset) and isinstance(
+            datasets[1], Dataset
+        )
+        assert sort == "random"
+        assert len(datasets) == 2
+        assert datasets[0].select_ratio == 6 and datasets[1].select_ratio == 4
+        assert datasets[0].length > 0 and datasets[1].length > 0
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tracestorm/cli.py b/tracestorm/cli.py
index 331b5a1..2647858 100644
--- a/tracestorm/cli.py
+++ b/tracestorm/cli.py
@@ -1,9 +1,10 @@
 import os
-from typing import Tuple
+from typing import Optional, Tuple
 
 import click
 
 from tracestorm.core import run_load_test
+from tracestorm.data_loader import load_datasets
 from tracestorm.logger import init_logger
 from tracestorm.trace_generator import (
     AzureTraceGenerator,
@@ -14,13 +15,13 @@
 logger = init_logger(__name__)
 
 # Valid patterns
-SYNTHETIC_PATTERNS = {"uniform"}
+SYNTHETIC_PATTERNS = {"uniform", "poisson", "random"}
 AZURE_PATTERNS = {"azure_code", "azure_conv"}
 VALID_PATTERNS = SYNTHETIC_PATTERNS | AZURE_PATTERNS
 
 
 def create_trace_generator(
-    pattern: str, rps: int, duration: int
+    pattern: str, rps: int, duration: int, seed: Optional[int] = None
 ) -> Tuple[TraceGenerator, str]:
     """
     Create appropriate trace generator based on pattern and validate parameters.
@@ -29,6 +30,7 @@ def create_trace_generator(
         pattern: Pattern for trace generation
         rps: Requests per second (only for synthetic patterns)
         duration: Duration in seconds (only for synthetic patterns)
+        seed: Random seed for reproducibility of trace patterns
 
     Returns:
         Tuple of (TraceGenerator instance, Warning message or empty string)
@@ -50,7 +52,9 @@ def create_trace_generator(
             raise ValueError(
                 "Duration must be non-negative for synthetic patterns"
             )
-        return SyntheticTraceGenerator(rps, pattern, duration), warning_msg
+        return SyntheticTraceGenerator(
+            rps, pattern, duration, seed
+        ), warning_msg
 
     # Azure patterns
     if rps != 1:
@@ -75,6 +79,7 @@ def create_trace_generator(
 @click.option(
     "--pattern",
     default="uniform",
+    type=click.Choice(sorted(VALID_PATTERNS), case_sensitive=False),
     help=f"Pattern for generating trace. Valid patterns: {sorted(VALID_PATTERNS)}",
 )
 @click.option(
@@ -83,6 +88,12 @@ def create_trace_generator(
     default=10,
     help="Duration in seconds (only used with synthetic patterns)",
 )
+@click.option(
+    "--seed",
+    type=int,
+    default=None,
+    help="Random seed for reproducibility of trace patterns",
+)
 @click.option(
     "--subprocesses", type=int, default=1, help="Number of subprocesses"
 )
@@ -98,21 +109,43 @@ def create_trace_generator(
     default=lambda: os.environ.get("OPENAI_API_KEY", "none"),
     help="OpenAI API Key",
 )
-def main(model, rps, pattern, duration, subprocesses, base_url, api_key):
+@click.option(
+    "--datasets-config", default=None, help="Config file for datasets"
+)
+def main(
+    model,
+    rps,
+    pattern,
+    duration,
+    seed,
+    subprocesses,
+    base_url,
+    api_key,
+    datasets_config,
+):
     """Run trace-based load testing for OpenAI API endpoints."""
     try:
         trace_generator, warning_msg = create_trace_generator(
-            pattern, rps, duration
+            pattern, rps, duration, seed
         )
         if warning_msg:
             logger.warning(warning_msg)
 
+        if datasets_config is None:
+            datasets = []
+            sort_strategy = None
+        else:
+            datasets, sort_strategy = load_datasets(datasets_config)
+
         _, result_analyzer = run_load_test(
             trace_generator=trace_generator,
             model=model,
             subprocesses=subprocesses,
             base_url=base_url,
             api_key=api_key,
+            datasets=datasets,
+            sort_strategy=sort_strategy,
+            seed=seed,
         )
 
         print(result_analyzer)
diff --git a/tracestorm/constants.py b/tracestorm/constants.py
index 6c97319..1b3d453 100644
--- a/tracestorm/constants.py
+++ b/tracestorm/constants.py
@@ -1,3 +1,5 @@
+import os
+
 AZURE_REPO_URL = "Azure/AzurePublicDataset"
 
 AZURE_DATASET_PATHS = {
@@ -11,3 +13,5 @@
 DEFAULT_SUBPROCESSES = 1
 
 DEFAULT_MESSAGES = "Tell me a story"
+
+DEFAULT_DATASET_FOLDER = os.path.expanduser("~/.cache/tracestorm")
diff --git a/tracestorm/core.py b/tracestorm/core.py
index 5185c9e..81ebef7 100644
--- a/tracestorm/core.py
+++ b/tracestorm/core.py
@@ -1,5 +1,5 @@
 import multiprocessing
-from typing import List, Tuple
+from typing import List, Optional, Tuple
 
 from tracestorm.logger import init_logger
 from tracestorm.request_generator import generate_request
@@ -17,6 +17,9 @@ def run_load_test(
     subprocesses: int,
     base_url: str,
     api_key: str,
+    datasets: List,
+    sort_strategy: Optional[str] = None,
+    seed: Optional[int] = None,
 ) -> Tuple[List[Tuple], ResultAnalyzer]:
     """
     Run load test with given configuration.
@@ -27,6 +30,9 @@ def run_load_test(
         subprocesses: Number of subprocesses to use
         base_url: Base URL for API calls
         api_key: API key for authentication
+        datasets: List of datasets to generate prompts
+        sort_strategy: Sorting strategy for prompts in datasets.
+        seed: Random seed for sorting.
 
     Returns:
         Tuple of (List of results, ResultAnalyzer instance)
@@ -38,7 +44,13 @@ def run_load_test(
         logger.warning("No requests to process. Trace is empty.")
         return [], ResultAnalyzer()
 
-    requests = generate_request(model, total_requests)
+    requests = generate_request(
+        model_name=model,
+        nums=total_requests,
+        datasets=datasets,
+        sort_strategy=sort_strategy,
+        seed=seed,
+    )
     ipc_queue = multiprocessing.Queue()
     processes = []
 
diff --git a/tracestorm/data_loader.py b/tracestorm/data_loader.py
new file mode 100644
index 0000000..29eb79e
--- /dev/null
+++ b/tracestorm/data_loader.py
@@ -0,0 +1,205 @@
+import json
+import os
+import re
+from dataclasses import dataclass
+from typing import List, Optional, Tuple
+
+import pandas as pd
+from datasets import load_dataset
+
+from tracestorm.constants import DEFAULT_DATASET_FOLDER
+from tracestorm.logger import init_logger
+
+logger = init_logger(__name__)
+
+
+@dataclass
+class Dataset:
+    """
+    Each Dataset object contains name of the dataset, a list of prompts,
+    the select ratio among all datasets, and the total number of prompts
+    """
+
+    file_name: str
+    prompts: List[str]
+    select_ratio: int
+    length: int
+
+
+def is_file_type(file_name, extensions):
+    return any(
+        re.search(rf"\.{ext}$", file_name, re.IGNORECASE) for ext in extensions
+    )
+
+
+def resolve_file_path(file_name: str) -> str:
+    """
+    Resolve the file path:
+    - If the file exists locally (relative or absolute path), return its absolute path.
+    - If the file exists in DEFAULT_DATASET_FOLDER, return that path.
+    - If the file does not exist in either location, return file_name, assuming it is to be loaded remotely from hugging face.
+    """
+    # os.makedirs(DEFAULT_DATASET_FOLDER, exist_ok=True)
+    if os.path.exists(file_name):
+        return os.path.abspath(file_name)
+
+    # check if file exists in DEFAULT_DATASET_FOLDER
+    file_path = os.path.join(DEFAULT_DATASET_FOLDER, file_name)
+    if os.path.exists(file_path):
+        return file_path
+
+    return file_name
+
+
+def normalize_prompts(row) -> List[str]:
+    """
+    Convert one row to a list of prompts based on the format.
+    """
+    prompts = []
+    if isinstance(row, list):  # if the row contains a list of prompts
+        for item in row:
+            if isinstance(item, str):
+                prompts.append(item)
+            elif isinstance(item, dict) and item.get("role") == "user":
+                prompt = next(
+                    (
+                        item.get(k, "")
+                        for k in ["message", "content", "value"]
+                        if item.get(k, "")
+                    ),
+                    "",
+                )
+                prompts.append(prompt)
+            else:  # we cannot handle this type
+                continue
+    elif isinstance(row, str):  # if the row is already a prompt
+        prompts.append(row)
+    elif (
+        isinstance(row, dict) and row.get("role") == "user"
+    ):  # if the row is a template, retrieve user prompt
+        prompt = next(
+            (
+                item.get(k, "")
+                for k in ["message", "content", "value"]
+                if item.get(k, "")
+            ),
+            "",
+        )
+        prompts.append(prompt)
+    else:
+        logger.error(f"Unrecognized row format: {row}")
+    return [p for p in prompts if p]  # Remove empty prompts
+
+
+def load_datasets(
+    datasets_config_file: Optional[str] = None,
+) -> Tuple[List[Dataset], Optional[str]]:
+    """
+    Load datasets from local files or Hugging Face datasets.
+
+    Args:
+        datasets_config_file: A dataset configuration file containing file paths,
+        prompt fields, selection ratios, and sorting strategies.
+        A customized data loading logic needs to be implemented if no
+        datasets_config_file is provided.
+
+    Return:
+        (List[Dataset], str): A list of Dataset objects and the sorting strategy.
+    """
+    if datasets_config_file is None:
+        logger.error("Customized data loading logic needs to be implemented!")
+        return [], None
+
+    # Load datasets configuration file
+    try:
+        with open(datasets_config_file, "r") as f:
+            datasets_config = json.load(f)
+    except FileNotFoundError:
+        logger.error(f"Configuration file '{datasets_config_file}' not found")
+        return [], None
+    except Exception as e:
+        logger.error(f"Error reading '{datasets_config_file}': {e}")
+        return [], None
+
+    # Strategy to sort the provided datasets
+    sort_strategy = datasets_config.pop("sort_strategy", "random")
+
+    # List to store each Dataset
+    datasets = []
+
+    for name, config in datasets_config.items():
+        file_name = config.get("file_name")
+        prompt_field = config.get("prompt_field")
+        split = config.get("split", "train")
+
+        try:
+            ratio = int(config.get("select_ratio", 1))
+        except ValueError:
+            logger.error(
+                f"Invalid 'select_ratio' for dataset '{name}', using default 1"
+            )
+            ratio = 1
+
+        if not file_name or not prompt_field:
+            logger.error(
+                f"Missing required 'file_name' or 'prompt_field' for dataset '{name}'"
+            )
+            continue
+
+        prompts = []
+        file_path = resolve_file_path(file_name)
+        check_field = False
+        try:
+            # If the file does not exist locally and is not of csv or json format,
+            # try to load it from hugging face using datasets.load_dataset() first
+            if not os.path.exists(file_path) and not is_file_type(
+                file_name, ["csv", "json", "jsonl"]
+            ):
+                data = load_dataset(file_name)[split]
+
+                if prompt_field not in data.column_names:
+                    logger.error(
+                        f"Field '{prompt_field}' not found in '{file_name}'."
+                    )
+                    continue
+
+                check_field = True
+
+            elif is_file_type(
+                file_name, ["csv"]
+            ):  # CSV files, could be either local or remote file
+                data = pd.read_csv(file_path)
+
+            elif is_file_type(file_name, ["json", "jsonl"]):  # JSON files
+                data = pd.read_json(
+                    file_path, lines=is_file_type(file_name, ["jsonl"])
+                )
+
+            else:
+                logger.error(
+                    f"Unsupported file format for '{file_name}'. Please implement customized loading logic."
+                )
+                continue
+
+        except Exception as e:
+            logger.error(f"Failed to load '{file_name}': {e}")
+            continue
+
+        if not check_field and prompt_field not in set(data.columns):
+            logger.error(f"Field '{prompt_field}' not found in '{file_name}'.")
+            continue
+
+        # prompts = data[prompt_field].dropna().astype(str).tolist()
+        # load each row
+        for row in data[prompt_field]:
+            prompts.extend(normalize_prompts(row))
+
+        # Add the dataset information (file name, a list of prompts, select ratio among all datasets, total number of prompts)
+        dataset_obj = Dataset(file_name, prompts, ratio, len(prompts))
+        datasets.append(dataset_obj)
+
+        logger.info(
+            f"loaded {file_name} with {len(prompts)} prompts, selection ratio = {ratio}"
+        )
+
+    return datasets, sort_strategy
diff --git a/tracestorm/request_generator.py b/tracestorm/request_generator.py
index d858410..4ed4f9c 100644
--- a/tracestorm/request_generator.py
+++ b/tracestorm/request_generator.py
@@ -1,18 +1,80 @@
+import random
 from typing import Any, Dict, List
 
 from tracestorm.constants import DEFAULT_MESSAGES
+from tracestorm.data_loader import Dataset
+from tracestorm.logger import init_logger
+
+logger = init_logger(__name__)
 
 
 def generate_request(
-    model_name: str, nums: int, messages: str = DEFAULT_MESSAGES
+    model_name: str,
+    nums: int,
+    messages: str = DEFAULT_MESSAGES,
+    datasets: List[Dataset] = [],
+    sort_strategy: str = "random",
+    seed: int = None,
 ) -> List[Dict[str, Any]]:
-    requests = []
-    for _ in range(nums):
-        requests.append(
+    # generate default requests without datasets
+    if not datasets:
+        for _ in range(nums):
+            return [
+                {
+                    "model": model_name,
+                    "messages": [{"role": "user", "content": messages}],
+                    "stream": True,
+                }
+                for _ in range(nums)
+            ]
+    else:  # Add and sort requests from the provided datasets
+        dataset_samples = []
+
+        # Total ratio to calculate number of requests for each dataset
+        total_ratio = sum(dataset_obj.select_ratio for dataset_obj in datasets)
+
+        for dataset_obj in datasets:
+            num_requests = int(
+                round(nums * dataset_obj.select_ratio / total_ratio)
+            )
+
+            # We don't have enough available prompts, repeat the dataset
+            available_prompts = dataset_obj.length
+            prompts = dataset_obj.prompts
+            if num_requests > available_prompts:
+                repeat_count = num_requests // available_prompts
+                prompts.extend(prompts * repeat_count)
+
+            assert len(prompts) >= num_requests
+
+            # Store prompts with indexing for round-robin
+            # For example, if ratio of dataset1 is 5, we will append 5 requests for each idx
+            for i, sample in enumerate(prompts[:num_requests]):
+                idx = i // dataset_obj.select_ratio
+                dataset_samples.append((idx, sample))
+
+            logger.info(
+                f"Selected {num_requests} requests from {dataset_obj.file_name}."
+            )
+
+        # 1. Randomly sort the requests
+        if sort_strategy == "random":
+            if seed is not None:
+                random.seed(seed)
+            random.shuffle(dataset_samples)
+        elif sort_strategy == "original":  # 2. original order
+            dataset_samples.sort(key=lambda x: x[0])
+        else:
+            raise ValueError(f"Unknown sorting strategy: {sort_strategy}")
+
+        # Extract the prompts from the list
+        requests = [
             {
                 "model": model_name,
-                "messages": [{"role": "user", "content": messages}],
+                "messages": [{"role": "user", "content": prompt}],
                 "stream": True,
             }
-        )
+            for _, prompt in dataset_samples
+        ]
+
     return requests
diff --git a/tracestorm/result_analyzer.py b/tracestorm/result_analyzer.py
index 5130fd2..d35aef9 100644
--- a/tracestorm/result_analyzer.py
+++ b/tracestorm/result_analyzer.py
@@ -178,7 +178,7 @@ def plot_cdf(
                 plt.figure(figsize=(8, 6))
                 sns.ecdfplot(self.ttft, color="blue")
                 plt.title("CDF of Time to First Token (TTFT)")
-                plt.xlabel("TTFT (ms)")
+                plt.xlabel("TTFT")
                 plt.ylabel("Cumulative Probability")
                 plt.tight_layout()
                 ttft_file = get_unique_file_path(ttft_file)
@@ -196,7 +196,7 @@ def plot_cdf(
                 plt.figure(figsize=(8, 6))
                 sns.ecdfplot(tpot_flat, color="green")
                 plt.title("CDF of Time per Output Token (TPOT)")
-                plt.xlabel("TPOT (ms)")
+                plt.xlabel("TPOT")
                 plt.ylabel("Cumulative Probability")
                 plt.tight_layout()
                 tpot_file = get_unique_file_path(tpot_file)
diff --git a/tracestorm/trace_generator.py b/tracestorm/trace_generator.py
index db7f1b0..36b98b5 100644
--- a/tracestorm/trace_generator.py
+++ b/tracestorm/trace_generator.py
@@ -3,6 +3,7 @@
 from abc import ABC, abstractmethod
 from typing import List, Optional
 
+import numpy as np
 import pandas as pd
 import requests
 
@@ -32,7 +33,9 @@ def generate(self) -> List[int]:
 class SyntheticTraceGenerator(TraceGenerator):
     """Generate synthetic traces based on patterns."""
 
-    def __init__(self, rps: int, pattern: str, duration: int):
+    def __init__(
+        self, rps: int, pattern: str, duration: int, seed: Optional[int] = None
+    ):
         """
         Initialize synthetic trace generator.
 
@@ -40,6 +43,7 @@ def __init__(self, rps: int, pattern: str, duration: int):
             rps (int): Requests per second. Must be non-negative.
             pattern (str): Distribution pattern ('uniform', 'random', 'poisson', etc.).
             duration (int): Total duration in seconds. Must be non-negative.
+            seed (int): Seed for reproducibility of 'poisson' and 'random' patterns
         """
         if not isinstance(rps, int) or rps < 0:
             raise ValueError("rps must be a non-negative integer")
@@ -49,6 +53,8 @@ def __init__(self, rps: int, pattern: str, duration: int):
         self.rps = rps
         self.pattern = pattern
         self.duration = duration
+        if seed is not None:
+            np.random.seed(seed)
 
     def generate(self) -> List[int]:
         total_requests = self.rps * self.duration
@@ -59,6 +65,7 @@ def generate(self) -> List[int]:
             return timestamps
 
         if self.pattern == "uniform":
+            # Distribute requests evenly across the duration
             interval = total_duration_ms / total_requests
             current_time = 0.0
             for _ in range(total_requests):
@@ -66,6 +73,19 @@ def generate(self) -> List[int]:
                 timestamp = min(timestamp, total_duration_ms - 1)
                 timestamps.append(timestamp)
                 current_time += interval
+        elif self.pattern == "poisson":
+            # Exponential distribution for intervals
+            rate_ms = self.rps / 1000
+            intervals = np.random.exponential(1 / rate_ms, total_requests)
+            current_time = 0.0
+            for i in range(total_requests):
+                timestamp = int(round(current_time))
+                timestamps.append(timestamp)
+                current_time += intervals[i]
+        elif self.pattern == "random":
+            timestamps = np.random.randint(
+                0, total_duration_ms, size=total_requests
+            ).tolist()
         else:
             raise ValueError(f"Unknown pattern: {self.pattern}")