diff --git a/README.md b/README.md index 38fc88b..748ccaf 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ tracestorm --model "Qwen/Qwen2.5-1.5B-Instruct" --pattern azure_code #### Example Command for Loading Prompts from Datasets ```bash -tracestorm --model "Qwen/Qwen2.5-1.5B-Instruct" --duration 30 --datasets-config-file ./examples/datasets_config_hf.json +tracestorm --model "Qwen/Qwen2.5-1.5B-Instruct" --duration 30 --datasets-config ./examples/datasets_config_hf.json ``` @@ -60,7 +60,7 @@ tracestorm --model "Qwen/Qwen2.5-1.5B-Instruct" --duration 30 --datasets-config- - Refer to `./examples/datasets_config_local.json` for an example configuration. - If you want to test loading from local files, please run `./examples/test_data_loader.py` first to download and save two datasets. -2. Remote datasets from Hugging Face +2. Remote datasets from Hugging Face - Refer to `./examples/datasets_config_hf.json` for an example configuration. **Sorting Strategy**: Defines how prompts from multiple datasets are ordered @@ -85,6 +85,6 @@ Please check `./examples/datasets_config_default.json` for required fields in `d - `--base-url`: Optional. OpenAI Base URL (default is `http://localhost:8000/v1`). - `--api-key`: Optional. OpenAI API Key (default is `none`). - `--seed`: Optional. Random seed for trace pattern reproducibility (default is `none`). -- `--datasets-config-file`: Optional. Configuration file for loading prompt messages from provided datasets. Uses `DEFAULT_MESSAGES` is not specified. +- `--datasets-config`: Optional. Configuration file for loading prompt messages from provided datasets. Uses `DEFAULT_MESSAGES` is not specified. Make sure to adjust the parameters according to your testing needs! diff --git a/examples/datasets_config_default.json b/examples/datasets_config_default.json new file mode 100644 index 0000000..0746f64 --- /dev/null +++ b/examples/datasets_config_default.json @@ -0,0 +1,14 @@ +{ + "sort_strategy": "random", + "dataset_1": { + "file_name": "", + "prompt_field": "", + "select_ratio": 1, + "split": "train" + }, + "dataset_2": { + "file_name": "", + "prompt_field": "", + "select_ratio": 1 + } +} \ No newline at end of file diff --git a/examples/datasets_config_hf.json b/examples/datasets_config_hf.json new file mode 100644 index 0000000..9539d50 --- /dev/null +++ b/examples/datasets_config_hf.json @@ -0,0 +1,15 @@ +{ + "sort_strategy": "original", + "dataset_1": { + "file_name": "hf://datasets/fka/awesome-chatgpt-prompts/prompts.csv", + "prompt_field": "prompt", + "select_ratio": 2, + "split": "train" + }, + "dataset_2": { + "file_name": "MAsad789565/Coding_GPT4_Data", + "prompt_field": "user", + "select_ratio": 8, + "split": "train" + } +} \ No newline at end of file diff --git a/examples/datasets_config_local.json b/examples/datasets_config_local.json new file mode 100644 index 0000000..0ca6d96 --- /dev/null +++ b/examples/datasets_config_local.json @@ -0,0 +1,13 @@ +{ + "sort_strategy": "random", + "dataset_1": { + "file_name": "Conversational_dataset.jsonl", + "prompt_field": "messages", + "select_ratio": 6 + }, + "dataset_2": { + "file_name": "~/.cache/tracestorm/GPT4_coding_sample.csv", + "prompt_field": "user", + "select_ratio": 4 + } +} \ No newline at end of file diff --git a/examples/save_test_datasets.py b/examples/save_test_datasets.py new file mode 100644 index 0000000..2aee421 --- /dev/null +++ b/examples/save_test_datasets.py @@ -0,0 +1,28 @@ +import os + +import pandas as pd + +from tracestorm.constants import DEFAULT_DATASET_FOLDER + + +def prepare_test_datasets(): + df1 = pd.read_json( + "hf://datasets/MAsad789565/Coding_GPT4_Data/Data/GPT_4_Coding.json" + ) + df2 = pd.read_json( + "hf://datasets/olathepavilion/Conversational-datasets-json/Validation.jsonl", + lines=True, + ) + + # save the pre-processed dataset to the default folder for test + os.makedirs(DEFAULT_DATASET_FOLDER, exist_ok=True) + path1 = os.path.join(DEFAULT_DATASET_FOLDER, "GPT4_coding_sample.csv") + path2 = os.path.join(DEFAULT_DATASET_FOLDER, "Conversational_dataset.jsonl") + + # test with different file formats + df1.to_csv(path1, index=False) + df2.to_json(path2, orient="records", lines=True) + + +if __name__ == "__main__": + prepare_test_datasets() diff --git a/pyproject.toml b/pyproject.toml index cbc3b0c..1fc7b00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,9 +12,11 @@ authors = [ ] dependencies = [ + "datasets>=3.3.2", "openai>=1.58.0", "numpy>=1.26.4", "pandas>=2.2.3", + "requests>=2.31.0", "seaborn>=0.13.2", "matplotlib>=3.9", "click>=8.1.8" @@ -43,4 +45,7 @@ ignore = ["B007"] # Loop control variable not used within loop body [tool.isort] use_parentheses = true -skip_gitignore = true \ No newline at end of file +skip_gitignore = true + +[tool.setuptools] +packages = { find = { exclude = ["examples"] } } diff --git a/requirements.txt b/requirements.txt index ba8db04..8f874c0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,8 @@ +click>=8.1.8 +datasets>=3.3.2 matplotlib>=3.9 numpy>=1.26.4 +openai>=1.58.0 pandas>=2.2.3 requests>=2.31.0 seaborn>=0.13.2 diff --git a/tests/test_cli.py b/tests/test_cli.py index e435630..8ed12a2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -81,7 +81,7 @@ def test_cli_invalid_pattern(self): ) self.assertNotEqual(result.exit_code, 0) - self.assertIn("Invalid pattern", result.output) + self.assertIn("Invalid value for '--pattern'", result.output) if __name__ == "__main__": diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py new file mode 100644 index 0000000..b323ddf --- /dev/null +++ b/tests/test_data_loader.py @@ -0,0 +1,63 @@ +import os +import unittest + +import pandas as pd + +from tracestorm.constants import DEFAULT_DATASET_FOLDER +from tracestorm.data_loader import Dataset, load_datasets + + +class TestDataLoader(unittest.TestCase): + def test_remote_files(self): + """ + Test loading datasets from hugging face. + There are 2 datasets, testing for: + 1. loading with datasets.load_dataset + 2. loading csv format with pandas + """ + datasets, sort = load_datasets("examples/datasets_config_hf.json") + assert isinstance(datasets, list) + assert isinstance(datasets[0], Dataset) and isinstance( + datasets[1], Dataset + ) + assert sort == "original" + assert len(datasets) == 2 + assert datasets[0].select_ratio == 2 and datasets[1].select_ratio == 8 + assert datasets[0].length > 0 and datasets[1].length > 0 + + def test_local_files(self): + """Test loading from local files""" + + os.makedirs(DEFAULT_DATASET_FOLDER, exist_ok=True) + # testing datasets + df1 = pd.read_json( + "hf://datasets/MAsad789565/Coding_GPT4_Data/Data/GPT_4_Coding.json" + ) + df2 = pd.read_json( + "hf://datasets/olathepavilion/Conversational-datasets-json/Validation.jsonl", + lines=True, + ) + + # test with different file formats + path1 = os.path.join(DEFAULT_DATASET_FOLDER, "GPT4_coding_sample.csv") + path2 = os.path.join( + DEFAULT_DATASET_FOLDER, "Conversational_dataset.jsonl" + ) + + # save the pre-processed dataset to the default folder for test + df1.to_csv(path1, index=False) + df2.to_json(path2, orient="records", lines=True) + + datasets, sort = load_datasets("examples/datasets_config_local.json") + assert isinstance(datasets, list) + assert isinstance(datasets[0], Dataset) and isinstance( + datasets[1], Dataset + ) + assert sort == "random" + assert len(datasets) == 2 + assert datasets[0].select_ratio == 6 and datasets[1].select_ratio == 4 + assert datasets[0].length > 0 and datasets[1].length > 0 + + +if __name__ == "__main__": + unittest.main() diff --git a/tracestorm/cli.py b/tracestorm/cli.py index 331b5a1..2647858 100644 --- a/tracestorm/cli.py +++ b/tracestorm/cli.py @@ -1,9 +1,10 @@ import os -from typing import Tuple +from typing import Optional, Tuple import click from tracestorm.core import run_load_test +from tracestorm.data_loader import load_datasets from tracestorm.logger import init_logger from tracestorm.trace_generator import ( AzureTraceGenerator, @@ -14,13 +15,13 @@ logger = init_logger(__name__) # Valid patterns -SYNTHETIC_PATTERNS = {"uniform"} +SYNTHETIC_PATTERNS = {"uniform", "poisson", "random"} AZURE_PATTERNS = {"azure_code", "azure_conv"} VALID_PATTERNS = SYNTHETIC_PATTERNS | AZURE_PATTERNS def create_trace_generator( - pattern: str, rps: int, duration: int + pattern: str, rps: int, duration: int, seed: Optional[int] = None ) -> Tuple[TraceGenerator, str]: """ Create appropriate trace generator based on pattern and validate parameters. @@ -29,6 +30,7 @@ def create_trace_generator( pattern: Pattern for trace generation rps: Requests per second (only for synthetic patterns) duration: Duration in seconds (only for synthetic patterns) + seed: Random seed for reproducibility of trace patterns Returns: Tuple of (TraceGenerator instance, Warning message or empty string) @@ -50,7 +52,9 @@ def create_trace_generator( raise ValueError( "Duration must be non-negative for synthetic patterns" ) - return SyntheticTraceGenerator(rps, pattern, duration), warning_msg + return SyntheticTraceGenerator( + rps, pattern, duration, seed + ), warning_msg # Azure patterns if rps != 1: @@ -75,6 +79,7 @@ def create_trace_generator( @click.option( "--pattern", default="uniform", + type=click.Choice(sorted(VALID_PATTERNS), case_sensitive=False), help=f"Pattern for generating trace. Valid patterns: {sorted(VALID_PATTERNS)}", ) @click.option( @@ -83,6 +88,12 @@ def create_trace_generator( default=10, help="Duration in seconds (only used with synthetic patterns)", ) +@click.option( + "--seed", + type=int, + default=None, + help="Random seed for reproducibility of trace patterns", +) @click.option( "--subprocesses", type=int, default=1, help="Number of subprocesses" ) @@ -98,21 +109,43 @@ def create_trace_generator( default=lambda: os.environ.get("OPENAI_API_KEY", "none"), help="OpenAI API Key", ) -def main(model, rps, pattern, duration, subprocesses, base_url, api_key): +@click.option( + "--datasets-config", default=None, help="Config file for datasets" +) +def main( + model, + rps, + pattern, + duration, + seed, + subprocesses, + base_url, + api_key, + datasets_config, +): """Run trace-based load testing for OpenAI API endpoints.""" try: trace_generator, warning_msg = create_trace_generator( - pattern, rps, duration + pattern, rps, duration, seed ) if warning_msg: logger.warning(warning_msg) + if datasets_config is None: + datasets = [] + sort_strategy = None + else: + datasets, sort_strategy = load_datasets(datasets_config) + _, result_analyzer = run_load_test( trace_generator=trace_generator, model=model, subprocesses=subprocesses, base_url=base_url, api_key=api_key, + datasets=datasets, + sort_strategy=sort_strategy, + seed=seed, ) print(result_analyzer) diff --git a/tracestorm/constants.py b/tracestorm/constants.py index 6c97319..1b3d453 100644 --- a/tracestorm/constants.py +++ b/tracestorm/constants.py @@ -1,3 +1,5 @@ +import os + AZURE_REPO_URL = "Azure/AzurePublicDataset" AZURE_DATASET_PATHS = { @@ -11,3 +13,5 @@ DEFAULT_SUBPROCESSES = 1 DEFAULT_MESSAGES = "Tell me a story" + +DEFAULT_DATASET_FOLDER = os.path.expanduser("~/.cache/tracestorm") diff --git a/tracestorm/core.py b/tracestorm/core.py index 5185c9e..81ebef7 100644 --- a/tracestorm/core.py +++ b/tracestorm/core.py @@ -1,5 +1,5 @@ import multiprocessing -from typing import List, Tuple +from typing import List, Optional, Tuple from tracestorm.logger import init_logger from tracestorm.request_generator import generate_request @@ -17,6 +17,9 @@ def run_load_test( subprocesses: int, base_url: str, api_key: str, + datasets: List, + sort_strategy: Optional[str] = None, + seed: Optional[int] = None, ) -> Tuple[List[Tuple], ResultAnalyzer]: """ Run load test with given configuration. @@ -27,6 +30,9 @@ def run_load_test( subprocesses: Number of subprocesses to use base_url: Base URL for API calls api_key: API key for authentication + datasets: List of datasets to generate prompts + sort_strategy: Sorting strategy for prompts in datasets. + seed: Random seed for sorting. Returns: Tuple of (List of results, ResultAnalyzer instance) @@ -38,7 +44,13 @@ def run_load_test( logger.warning("No requests to process. Trace is empty.") return [], ResultAnalyzer() - requests = generate_request(model, total_requests) + requests = generate_request( + model_name=model, + nums=total_requests, + datasets=datasets, + sort_strategy=sort_strategy, + seed=seed, + ) ipc_queue = multiprocessing.Queue() processes = [] diff --git a/tracestorm/data_loader.py b/tracestorm/data_loader.py new file mode 100644 index 0000000..29eb79e --- /dev/null +++ b/tracestorm/data_loader.py @@ -0,0 +1,205 @@ +import json +import os +import re +from dataclasses import dataclass +from typing import List, Optional, Tuple + +import pandas as pd +from datasets import load_dataset + +from tracestorm.constants import DEFAULT_DATASET_FOLDER +from tracestorm.logger import init_logger + +logger = init_logger(__name__) + + +@dataclass +class Dataset: + """ + Each Dataset object contains name of the dataset, a list of prompts, + the select ratio among all datasets, and the total number of prompts + """ + + file_name: str + prompts: List[str] + select_ratio: int + length: int + + +def is_file_type(file_name, extensions): + return any( + re.search(rf"\.{ext}$", file_name, re.IGNORECASE) for ext in extensions + ) + + +def resolve_file_path(file_name: str) -> str: + """ + Resolve the file path: + - If the file exists locally (relative or absolute path), return its absolute path. + - If the file exists in DEFAULT_DATASET_FOLDER, return that path. + - If the file does not exist in either location, return file_name, assuming it is to be loaded remotely from hugging face. + """ + # os.makedirs(DEFAULT_DATASET_FOLDER, exist_ok=True) + if os.path.exists(file_name): + return os.path.abspath(file_name) + + # check if file exists in DEFAULT_DATASET_FOLDER + file_path = os.path.join(DEFAULT_DATASET_FOLDER, file_name) + if os.path.exists(file_path): + return file_path + + return file_name + + +def normalize_prompts(row) -> List[str]: + """ + Convert one row to a list of prompts based on the format. + """ + prompts = [] + if isinstance(row, list): # if the row contains a list of prompts + for item in row: + if isinstance(item, str): + prompts.append(item) + elif isinstance(item, dict) and item.get("role") == "user": + prompt = next( + ( + item.get(k, "") + for k in ["message", "content", "value"] + if item.get(k, "") + ), + "", + ) + prompts.append(prompt) + else: # we cannot handle this type + continue + elif isinstance(row, str): # if the row is already a prompt + prompts.append(row) + elif ( + isinstance(row, dict) and row.get("role") == "user" + ): # if the row is a template, retrieve user prompt + prompt = next( + ( + item.get(k, "") + for k in ["message", "content", "value"] + if item.get(k, "") + ), + "", + ) + prompts.append(prompt) + else: + logger.error(f"Unrecognized row format: {row}") + return [p for p in prompts if p] # Remove empty prompts + + +def load_datasets( + datasets_config_file: Optional[str] = None, +) -> Tuple[List[Dataset], Optional[str]]: + """ + Load datasets from local files or Hugging Face datasets. + + Args: + datasets_config_file: A dataset configuration file containing file paths, + prompt fields, selection ratios, and sorting strategies. + A customized data loading logic needs to be implemented if no + datasets_config_file is provided. + + Return: + (List[Dataset], str): A list of Dataset objects and the sorting strategy. + """ + if datasets_config_file is None: + logger.error("Customized data loading logic needs to be implemented!") + return [], None + + # Load datasets configuration file + try: + with open(datasets_config_file, "r") as f: + datasets_config = json.load(f) + except FileNotFoundError: + logger.error(f"Configuration file '{datasets_config_file}' not found") + return [], None + except Exception as e: + logger.error(f"Error reading '{datasets_config_file}': {e}") + return [], None + + # Strategy to sort the provided datasets + sort_strategy = datasets_config.pop("sort_strategy", "random") + + # List to store each Dataset + datasets = [] + + for name, config in datasets_config.items(): + file_name = config.get("file_name") + prompt_field = config.get("prompt_field") + split = config.get("split", "train") + + try: + ratio = int(config.get("select_ratio", 1)) + except ValueError: + logger.error( + f"Invalid 'select_ratio' for dataset '{name}', using default 1" + ) + ratio = 1 + + if not file_name or not prompt_field: + logger.error( + f"Missing required 'file_name' or 'prompt_field' for dataset '{name}'" + ) + continue + + prompts = [] + file_path = resolve_file_path(file_name) + check_field = False + try: + # If the file does not exist locally and is not of csv or json format, + # try to load it from hugging face using datasets.load_dataset() first + if not os.path.exists(file_path) and not is_file_type( + file_name, ["csv", "json", "jsonl"] + ): + data = load_dataset(file_name)[split] + + if prompt_field not in data.column_names: + logger.error( + f"Field '{prompt_field}' not found in '{file_name}'." + ) + continue + + check_field = True + + elif is_file_type( + file_name, ["csv"] + ): # CSV files, could be either local or remote file + data = pd.read_csv(file_path) + + elif is_file_type(file_name, ["json", "jsonl"]): # JSON files + data = pd.read_json( + file_path, lines=is_file_type(file_name, ["jsonl"]) + ) + + else: + logger.error( + f"Unsupported file format for '{file_name}'. Please implement customized loading logic." + ) + continue + + except Exception as e: + logger.error(f"Failed to load '{file_name}': {e}") + continue + + if not check_field and prompt_field not in set(data.columns): + logger.error(f"Field '{prompt_field}' not found in '{file_name}'.") + continue + + # prompts = data[prompt_field].dropna().astype(str).tolist() + # load each row + for row in data[prompt_field]: + prompts.extend(normalize_prompts(row)) + + # Add the dataset information (file name, a list of prompts, select ratio among all datasets, total number of prompts) + dataset_obj = Dataset(file_name, prompts, ratio, len(prompts)) + datasets.append(dataset_obj) + + logger.info( + f"loaded {file_name} with {len(prompts)} prompts, selection ratio = {ratio}" + ) + + return datasets, sort_strategy diff --git a/tracestorm/request_generator.py b/tracestorm/request_generator.py index d858410..4ed4f9c 100644 --- a/tracestorm/request_generator.py +++ b/tracestorm/request_generator.py @@ -1,18 +1,80 @@ +import random from typing import Any, Dict, List from tracestorm.constants import DEFAULT_MESSAGES +from tracestorm.data_loader import Dataset +from tracestorm.logger import init_logger + +logger = init_logger(__name__) def generate_request( - model_name: str, nums: int, messages: str = DEFAULT_MESSAGES + model_name: str, + nums: int, + messages: str = DEFAULT_MESSAGES, + datasets: List[Dataset] = [], + sort_strategy: str = "random", + seed: int = None, ) -> List[Dict[str, Any]]: - requests = [] - for _ in range(nums): - requests.append( + # generate default requests without datasets + if not datasets: + for _ in range(nums): + return [ + { + "model": model_name, + "messages": [{"role": "user", "content": messages}], + "stream": True, + } + for _ in range(nums) + ] + else: # Add and sort requests from the provided datasets + dataset_samples = [] + + # Total ratio to calculate number of requests for each dataset + total_ratio = sum(dataset_obj.select_ratio for dataset_obj in datasets) + + for dataset_obj in datasets: + num_requests = int( + round(nums * dataset_obj.select_ratio / total_ratio) + ) + + # We don't have enough available prompts, repeat the dataset + available_prompts = dataset_obj.length + prompts = dataset_obj.prompts + if num_requests > available_prompts: + repeat_count = num_requests // available_prompts + prompts.extend(prompts * repeat_count) + + assert len(prompts) >= num_requests + + # Store prompts with indexing for round-robin + # For example, if ratio of dataset1 is 5, we will append 5 requests for each idx + for i, sample in enumerate(prompts[:num_requests]): + idx = i // dataset_obj.select_ratio + dataset_samples.append((idx, sample)) + + logger.info( + f"Selected {num_requests} requests from {dataset_obj.file_name}." + ) + + # 1. Randomly sort the requests + if sort_strategy == "random": + if seed is not None: + random.seed(seed) + random.shuffle(dataset_samples) + elif sort_strategy == "original": # 2. original order + dataset_samples.sort(key=lambda x: x[0]) + else: + raise ValueError(f"Unknown sorting strategy: {sort_strategy}") + + # Extract the prompts from the list + requests = [ { "model": model_name, - "messages": [{"role": "user", "content": messages}], + "messages": [{"role": "user", "content": prompt}], "stream": True, } - ) + for _, prompt in dataset_samples + ] + return requests diff --git a/tracestorm/result_analyzer.py b/tracestorm/result_analyzer.py index 5130fd2..d35aef9 100644 --- a/tracestorm/result_analyzer.py +++ b/tracestorm/result_analyzer.py @@ -178,7 +178,7 @@ def plot_cdf( plt.figure(figsize=(8, 6)) sns.ecdfplot(self.ttft, color="blue") plt.title("CDF of Time to First Token (TTFT)") - plt.xlabel("TTFT (ms)") + plt.xlabel("TTFT") plt.ylabel("Cumulative Probability") plt.tight_layout() ttft_file = get_unique_file_path(ttft_file) @@ -196,7 +196,7 @@ def plot_cdf( plt.figure(figsize=(8, 6)) sns.ecdfplot(tpot_flat, color="green") plt.title("CDF of Time per Output Token (TPOT)") - plt.xlabel("TPOT (ms)") + plt.xlabel("TPOT") plt.ylabel("Cumulative Probability") plt.tight_layout() tpot_file = get_unique_file_path(tpot_file) diff --git a/tracestorm/trace_generator.py b/tracestorm/trace_generator.py index db7f1b0..36b98b5 100644 --- a/tracestorm/trace_generator.py +++ b/tracestorm/trace_generator.py @@ -3,6 +3,7 @@ from abc import ABC, abstractmethod from typing import List, Optional +import numpy as np import pandas as pd import requests @@ -32,7 +33,9 @@ def generate(self) -> List[int]: class SyntheticTraceGenerator(TraceGenerator): """Generate synthetic traces based on patterns.""" - def __init__(self, rps: int, pattern: str, duration: int): + def __init__( + self, rps: int, pattern: str, duration: int, seed: Optional[int] = None + ): """ Initialize synthetic trace generator. @@ -40,6 +43,7 @@ def __init__(self, rps: int, pattern: str, duration: int): rps (int): Requests per second. Must be non-negative. pattern (str): Distribution pattern ('uniform', 'random', 'poisson', etc.). duration (int): Total duration in seconds. Must be non-negative. + seed (int): Seed for reproducibility of 'poisson' and 'random' patterns """ if not isinstance(rps, int) or rps < 0: raise ValueError("rps must be a non-negative integer") @@ -49,6 +53,8 @@ def __init__(self, rps: int, pattern: str, duration: int): self.rps = rps self.pattern = pattern self.duration = duration + if seed is not None: + np.random.seed(seed) def generate(self) -> List[int]: total_requests = self.rps * self.duration @@ -59,6 +65,7 @@ def generate(self) -> List[int]: return timestamps if self.pattern == "uniform": + # Distribute requests evenly across the duration interval = total_duration_ms / total_requests current_time = 0.0 for _ in range(total_requests): @@ -66,6 +73,19 @@ def generate(self) -> List[int]: timestamp = min(timestamp, total_duration_ms - 1) timestamps.append(timestamp) current_time += interval + elif self.pattern == "poisson": + # Exponential distribution for intervals + rate_ms = self.rps / 1000 + intervals = np.random.exponential(1 / rate_ms, total_requests) + current_time = 0.0 + for i in range(total_requests): + timestamp = int(round(current_time)) + timestamps.append(timestamp) + current_time += intervals[i] + elif self.pattern == "random": + timestamps = np.random.randint( + 0, total_duration_ms, size=total_requests + ).tolist() else: raise ValueError(f"Unknown pattern: {self.pattern}")