From 803a41adbacfad91d8bf5bf37bdaecde4a268a8e Mon Sep 17 00:00:00 2001 From: Brian Raf <92820864+nv-braf@users.noreply.github.com> Date: Thu, 7 Nov 2024 08:28:44 -0800 Subject: [PATCH] Turn statistics into GAP Records (#166) * Changing record names to match GAP and adding some missing type checking * Fixing other unit tests * Updating time to first token records * Updating inter token latency records * Updaing output token throughput record * Adding output token throughput per request records * Adding output sequence length (OSL) records * Adding Input sequence length (ISL) records * Removing non-GAP records * Adding telemetry records * Fixing unit testing * Adding request goodput record * Adding method to create records from statistics * Added very basic unit testing * Remove demo file (accidental commit) * Fix codeql error --- .../measurements/model_config_measurement.py | 3 +- genai-perf/genai_perf/metrics/statistics.py | 27 +++++ genai-perf/genai_perf/record/record.py | 20 +--- .../record/types/cpu_available_ram.py | 91 ---------------- .../genai_perf/record/types/cpu_used_ram.py | 91 ---------------- .../record/types/gpu_energy_consumption.py | 56 ++++++++++ .../record/types/gpu_free_memory.py | 94 ---------------- .../record/types/gpu_memory_used.py | 45 ++++++++ .../record/types/gpu_power_limit.py | 52 +++++++++ .../record/types/gpu_power_usage.py | 63 +---------- .../record/types/gpu_total_memory.py | 65 ++--------- .../record/types/gpu_used_memory.py | 94 ---------------- .../record/types/gpu_utilization.py | 64 +---------- .../record/types/input_sequence_length.py | 103 ------------------ .../record/types/input_sequence_length_avg.py | 33 ++++++ .../types/input_sequence_length_base.py | 52 +++++++++ .../record/types/input_sequence_length_max.py | 33 ++++++ .../record/types/input_sequence_length_min.py | 33 ++++++ .../record/types/input_sequence_length_p25.py | 33 ++++++ .../record/types/input_sequence_length_p50.py | 33 ++++++ .../record/types/input_sequence_length_p75.py | 33 ++++++ .../record/types/input_sequence_length_p90.py | 33 ++++++ .../record/types/input_sequence_length_p95.py | 33 ++++++ .../record/types/input_sequence_length_p99.py | 33 ++++++ .../record/types/input_sequence_length_std.py | 33 ++++++ .../record/types/inter_token_latency_avg.py | 31 +----- .../record/types/inter_token_latency_base.py | 33 ++---- .../record/types/inter_token_latency_max.py | 31 +----- .../record/types/inter_token_latency_min.py | 31 +----- .../record/types/inter_token_latency_p25.py | 31 +----- .../record/types/inter_token_latency_p50.py | 31 +----- .../record/types/inter_token_latency_p75.py | 31 +----- .../record/types/inter_token_latency_p90.py | 31 +----- .../record/types/inter_token_latency_p95.py | 31 +----- .../record/types/inter_token_latency_p99.py | 31 +----- .../record/types/inter_token_latency_std.py | 33 ++++++ .../record/types/output_sequence_length.py | 103 ------------------ .../types/output_sequence_length_avg.py | 33 ++++++ .../types/output_sequence_length_base.py | 52 +++++++++ .../types/output_sequence_length_max.py | 33 ++++++ .../types/output_sequence_length_min.py | 33 ++++++ .../types/output_sequence_length_p25.py | 33 ++++++ .../types/output_sequence_length_p50.py | 33 ++++++ .../types/output_sequence_length_p75.py | 33 ++++++ .../types/output_sequence_length_p90.py | 33 ++++++ .../types/output_sequence_length_p95.py | 33 ++++++ .../types/output_sequence_length_p99.py | 33 ++++++ .../types/output_sequence_length_std.py | 33 ++++++ .../record/types/output_token_throughput.py | 103 ------------------ .../types/output_token_throughput_avg.py | 50 +++++++++ .../output_token_throughput_per_request.py | 103 ------------------ ...output_token_throughput_per_request_avg.py | 35 ++++++ ...utput_token_throughput_per_request_base.py | 56 ++++++++++ ...output_token_throughput_per_request_max.py | 35 ++++++ ...output_token_throughput_per_request_min.py | 35 ++++++ ...output_token_throughput_per_request_p25.py | 35 ++++++ ...output_token_throughput_per_request_p50.py | 35 ++++++ ...output_token_throughput_per_request_p75.py | 35 ++++++ ...output_token_throughput_per_request_p90.py | 35 ++++++ ...output_token_throughput_per_request_p95.py | 35 ++++++ ...output_token_throughput_per_request_p99.py | 35 ++++++ ...output_token_throughput_per_request_std.py | 35 ++++++ .../record/types/perf_client_response_wait.py | 95 ---------------- .../record/types/perf_client_send_recv.py | 95 ---------------- .../record/types/perf_latency_avg.py | 58 ---------- .../record/types/perf_latency_p90.py | 58 ---------- .../record/types/perf_latency_p95.py | 58 ---------- .../record/types/perf_latency_p99.py | 58 ---------- .../record/types/perf_server_compute_infer.py | 95 ---------------- .../record/types/perf_server_compute_input.py | 95 ---------------- .../types/perf_server_compute_output.py | 95 ---------------- .../record/types/perf_server_queue.py | 95 ---------------- .../record/types/perf_throughput.py | 103 ------------------ .../record/types/request_goodput_avg.py | 49 +++++++++ .../record/types/request_latency_avg.py | 33 ++++++ ...atency_base.py => request_latency_base.py} | 35 ++---- .../record/types/request_latency_max.py | 33 ++++++ .../record/types/request_latency_min.py | 33 ++++++ .../record/types/request_latency_p25.py | 33 ++++++ .../record/types/request_latency_p50.py | 33 ++++++ .../record/types/request_latency_p75.py | 33 ++++++ .../record/types/request_latency_p90.py | 33 ++++++ .../record/types/request_latency_p95.py | 33 ++++++ .../record/types/request_latency_p99.py | 33 ++++++ .../record/types/request_latency_std.py | 33 ++++++ .../record/types/request_throughput_avg.py | 49 +++++++++ .../record/types/time_to_first_token_avg.py | 31 +----- .../record/types/time_to_first_token_base.py | 33 ++---- .../record/types/time_to_first_token_max.py | 31 +----- .../record/types/time_to_first_token_min.py | 31 +----- .../record/types/time_to_first_token_p25.py | 31 +----- .../record/types/time_to_first_token_p50.py | 31 +----- .../record/types/time_to_first_token_p75.py | 31 +----- .../record/types/time_to_first_token_p90.py | 31 +----- .../record/types/time_to_first_token_p95.py | 31 +----- .../record/types/time_to_first_token_p99.py | 31 +----- .../record/types/time_to_first_token_std.py | 33 ++++++ genai-perf/genai_perf/types.py | 1 + .../tests/test_llm_profile_data_parser.py | 6 + .../tests/test_model_config_measurement.py | 54 +++++---- genai-perf/tests/test_record.py | 73 +++++++++---- genai-perf/tests/test_results.py | 4 +- .../tests/test_run_config_measurement.py | 56 +++++----- genai-perf/tests/test_utils.py | 20 ++-- 104 files changed, 2128 insertions(+), 2541 deletions(-) delete mode 100644 genai-perf/genai_perf/record/types/cpu_available_ram.py delete mode 100644 genai-perf/genai_perf/record/types/cpu_used_ram.py create mode 100644 genai-perf/genai_perf/record/types/gpu_energy_consumption.py delete mode 100644 genai-perf/genai_perf/record/types/gpu_free_memory.py create mode 100644 genai-perf/genai_perf/record/types/gpu_memory_used.py create mode 100644 genai-perf/genai_perf/record/types/gpu_power_limit.py delete mode 100644 genai-perf/genai_perf/record/types/gpu_used_memory.py delete mode 100644 genai-perf/genai_perf/record/types/input_sequence_length.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_avg.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_base.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_max.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_min.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_p25.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_p50.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_p75.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_p90.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_p95.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_p99.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_std.py create mode 100644 genai-perf/genai_perf/record/types/inter_token_latency_std.py delete mode 100644 genai-perf/genai_perf/record/types/output_sequence_length.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_avg.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_base.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_max.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_min.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_p25.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_p50.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_p75.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_p90.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_p95.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_p99.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_std.py delete mode 100644 genai-perf/genai_perf/record/types/output_token_throughput.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_avg.py delete mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_avg.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_base.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_max.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_min.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_p25.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_p50.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_p75.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_p90.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_p95.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_p99.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_std.py delete mode 100644 genai-perf/genai_perf/record/types/perf_client_response_wait.py delete mode 100644 genai-perf/genai_perf/record/types/perf_client_send_recv.py delete mode 100644 genai-perf/genai_perf/record/types/perf_latency_avg.py delete mode 100644 genai-perf/genai_perf/record/types/perf_latency_p90.py delete mode 100644 genai-perf/genai_perf/record/types/perf_latency_p95.py delete mode 100644 genai-perf/genai_perf/record/types/perf_latency_p99.py delete mode 100644 genai-perf/genai_perf/record/types/perf_server_compute_infer.py delete mode 100644 genai-perf/genai_perf/record/types/perf_server_compute_input.py delete mode 100644 genai-perf/genai_perf/record/types/perf_server_compute_output.py delete mode 100644 genai-perf/genai_perf/record/types/perf_server_queue.py delete mode 100644 genai-perf/genai_perf/record/types/perf_throughput.py create mode 100644 genai-perf/genai_perf/record/types/request_goodput_avg.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_avg.py rename genai-perf/genai_perf/record/types/{perf_latency_base.py => request_latency_base.py} (67%) create mode 100644 genai-perf/genai_perf/record/types/request_latency_max.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_min.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_p25.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_p50.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_p75.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_p90.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_p95.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_p99.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_std.py create mode 100644 genai-perf/genai_perf/record/types/request_throughput_avg.py create mode 100644 genai-perf/genai_perf/record/types/time_to_first_token_std.py diff --git a/genai-perf/genai_perf/measurements/model_config_measurement.py b/genai-perf/genai_perf/measurements/model_config_measurement.py index aecedf0f..7fb6d75f 100644 --- a/genai-perf/genai_perf/measurements/model_config_measurement.py +++ b/genai-perf/genai_perf/measurements/model_config_measurement.py @@ -20,6 +20,7 @@ from typing import Any, Dict, Optional, TypeAlias from genai_perf.record.record import Record +from genai_perf.record.types.request_throughput_avg import RequestThroughputAvg from genai_perf.types import ( CheckpointObject, MetricObjectives, @@ -39,7 +40,7 @@ ########################################################################### @dataclass(frozen=True) class ModelConfigMeasurementDefaults: - METRIC_OBJECTIVE = {"perf_throughput": 1.0} + METRIC_OBJECTIVE = {RequestThroughputAvg.tag: 1.0} SELF_IS_BETTER = 1 OTHER_IS_BETTER = -1 diff --git a/genai-perf/genai_perf/metrics/statistics.py b/genai-perf/genai_perf/metrics/statistics.py index 71f7c3ea..c2e4ffb6 100755 --- a/genai-perf/genai_perf/metrics/statistics.py +++ b/genai-perf/genai_perf/metrics/statistics.py @@ -32,8 +32,10 @@ import numpy as np import pandas as pd +from genai_perf.exceptions import GenAIPerfException from genai_perf.metrics.metrics import Metrics from genai_perf.metrics.telemetry_metrics import TelemetryMetrics +from genai_perf.record.record import Record, RecordType class Statistics: @@ -192,3 +194,28 @@ def export_parquet(self, artifact_dir: Path, filename: str) -> None: filepath = artifact_dir / f"{filename}.gzip" df.to_parquet(filepath, compression="gzip") + + def create_records(self) -> List[Record]: + """ + Populates and returns a list of Records + """ + statistic_records = [] + for metric_base_name, metric_info in self.stats_dict.items(): + for metric_post_name, metric_value in metric_info.items(): + if metric_post_name == "unit": + continue + + metric_name = metric_base_name + "_" + metric_post_name + + try: + new_record = RecordType.get_all_record_types()[metric_name]( + metric_value + ) + except KeyError: + raise GenAIPerfException( + f"{metric_name} is not a valid Record tag." + ) + + statistic_records.append(new_record) + + return statistic_records diff --git a/genai-perf/genai_perf/record/record.py b/genai-perf/genai_perf/record/record.py index 122ed4c1..1c2788d7 100644 --- a/genai-perf/genai_perf/record/record.py +++ b/genai-perf/genai_perf/record/record.py @@ -19,6 +19,7 @@ from typing import Dict, Union from genai_perf.exceptions import GenAIPerfException +from genai_perf.types import RecordValue class RecordType(ABCMeta): @@ -90,16 +91,7 @@ class Record(metaclass=RecordType): records """ - def __init__(self, value: Union[float, int], timestamp: int): - """ - Parameters - ---------- - value : float or int - The value of the GPU metric - timestamp : int - The timestamp for the record in nanoseconds - """ - + def __init__(self, value: RecordValue, timestamp: int): assert type(value) is float or type(value) is int assert type(timestamp) is int @@ -124,16 +116,12 @@ def aggregation_function(): def value_function() -> float: """ Returns the average value from a list - - Returns - ------- - Average value of the list """ return mean # type: ignore @staticmethod @abstractmethod - def header(aggregation_tag=False): + def header(aggregation_tag=False) -> str: """ Parameters ---------- @@ -170,7 +158,7 @@ def create_class_from_checkpoint(cls, record_dict) -> "Record": setattr(record, key, record_dict[key]) return record - def value(self) -> Union[float, int]: + def value(self) -> RecordValue: """ This method returns the value of recorded metric diff --git a/genai-perf/genai_perf/record/types/cpu_available_ram.py b/genai-perf/genai_perf/record/types/cpu_available_ram.py deleted file mode 100644 index b4e069ac..00000000 --- a/genai-perf/genai_perf/record/types/cpu_available_ram.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import IncreasingRecord - - -@total_ordering -class CPUAvailableRAM(IncreasingRecord): - """ - The Available CPU memory - """ - - tag = "cpu_available_ram" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - CPU free memory - timestamp : int - The timestamp for the record in nanoseconds - """ - - super().__init__(value, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return ("Max " if aggregation_tag else "") + "RAM Available (MB)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() < other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return CPUAvailableRAM(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return CPUAvailableRAM(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/cpu_used_ram.py b/genai-perf/genai_perf/record/types/cpu_used_ram.py deleted file mode 100644 index 7ee7f1d4..00000000 --- a/genai-perf/genai_perf/record/types/cpu_used_ram.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import DecreasingRecord - - -@total_ordering -class CPUUsedRAM(DecreasingRecord): - """ - The CPU memory usage record - """ - - tag = "cpu_used_ram" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - CPU used memory - timestamp : int - The timestamp for the record in nanoseconds - """ - - super().__init__(value, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return ("Max " if aggregation_tag else "") + "RAM Usage (MB)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is better than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return CPUUsedRAM(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return CPUUsedRAM(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_energy_consumption.py b/genai-perf/genai_perf/record/types/gpu_energy_consumption.py new file mode 100644 index 00000000..15db4fb2 --- /dev/null +++ b/genai-perf/genai_perf/record/types/gpu_energy_consumption.py @@ -0,0 +1,56 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.gpu_record import DecreasingGPURecord + + +@total_ordering +class GPUEnergyConsumption(DecreasingGPURecord): + """ + GPU's energy consumption metric + """ + + tag = "energy_consumption" + + def __init__(self, value, device_uuid=None, timestamp=0): + super().__init__(value, device_uuid, timestamp) + + @staticmethod + def aggregation_function(): + def average(seq): + return sum(seq[1:], start=seq[0]) / len(seq) + + return average + + @staticmethod + def header(aggregation_tag=False): + return ("Average " if aggregation_tag else "") + "GPU Energy Consumption (MJ)" + + def __eq__(self, other: "GPUEnegryConsumption") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "GPUEnergyConsumption") -> bool: + return other.value() < self.value() + + def __add__(self, other: "GPUEnergyConsumption") -> "GPUEnergyConsumption": + return GPUEnergyConsumption( + device_uuid=None, value=(self.value() + other.value()) + ) + + def __sub__(self, other: "GPUEnergyConsumption") -> "GPUEnergyConsumption": + return GPUEnergyConsumption( + device_uuid=None, value=(other.value() - self.value()) + ) diff --git a/genai-perf/genai_perf/record/types/gpu_free_memory.py b/genai-perf/genai_perf/record/types/gpu_free_memory.py deleted file mode 100644 index 21bf3f58..00000000 --- a/genai-perf/genai_perf/record/types/gpu_free_memory.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.gpu_record import IncreasingGPURecord - - -@total_ordering -class GPUFreeMemory(IncreasingGPURecord): - """ - The free memory in the GPU. - """ - - tag = "gpu_free_memory" - - def __init__(self, value, device_uuid=None, timestamp=0): - """ - Parameters - ---------- - value : float - The value of the GPU metrtic - device_uuid : str - The GPU device uuid this metric is associated - with. - timestamp : int - The timestamp for the record in nanoseconds - """ - - super().__init__(value, device_uuid, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return ("Max " if aggregation_tag else "") + "GPU Memory Available (MB)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() < other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return GPUFreeMemory(device_uuid=None, value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return GPUFreeMemory(device_uuid=None, value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_memory_used.py b/genai-perf/genai_perf/record/types/gpu_memory_used.py new file mode 100644 index 00000000..6f1ac81e --- /dev/null +++ b/genai-perf/genai_perf/record/types/gpu_memory_used.py @@ -0,0 +1,45 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.gpu_record import IncreasingGPURecord + + +@total_ordering +class GPUMemoryUsed(IncreasingGPURecord): + """ + GPU's used memory metric + """ + + tag = "gpu_memory_used" + + def __init__(self, value, device_uuid=None, timestamp=0): + super().__init__(value, device_uuid, timestamp) + + @staticmethod + def header(aggregation_tag=False) -> str: + return ("Max " if aggregation_tag else "") + "GPU Memory Used (GB)" + + def __eq__(self, other: "GPUMemoryUsed") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "GPUMemoryUsed") -> bool: + return self.value() < other.value() + + def __add__(self, other: "GPUMemoryUsed") -> "GPUMemoryUsed": + return GPUMemoryUsed(device_uuid=None, value=(self.value() + other.value())) + + def __sub__(self, other: "GPUMemoryUsed") -> "GPUMemoryUsed": + return GPUMemoryUsed(device_uuid=None, value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_power_limit.py b/genai-perf/genai_perf/record/types/gpu_power_limit.py new file mode 100644 index 00000000..7e5cce2a --- /dev/null +++ b/genai-perf/genai_perf/record/types/gpu_power_limit.py @@ -0,0 +1,52 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.gpu_record import IncreasingGPURecord + + +@total_ordering +class GPUPowerLimit(IncreasingGPURecord): + """ + GPU's power limit metric + """ + + tag = "gpu_power_limit" + + def __init__(self, value, device_uuid=None, timestamp=0): + super().__init__(value, device_uuid, timestamp) + + @staticmethod + def aggregation_function(): + def average(seq): + return sum(seq[1:], start=seq[0]) / len(seq) + + return average + + @staticmethod + def header(aggregation_tag=False): + return ("Average " if aggregation_tag else "") + "GPU Power Limit (W)" + + def __eq__(self, other: "GPUPowerLimit") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "GPUPowerLimit") -> bool: + return self.value() < other.value() + + def __add__(self, other: "GPUPowerLimit") -> "GPUPowerLimit": + return GPUPowerLimit(device_uuid=None, value=(self.value() + other.value())) + + def __sub__(self, other: "GPUPowerLimit") -> "GPUPowerLimit": + return GPUPowerLimit(device_uuid=None, value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_power_usage.py b/genai-perf/genai_perf/record/types/gpu_power_usage.py index 5f1fc49d..31ec2ac2 100644 --- a/genai-perf/genai_perf/record/types/gpu_power_usage.py +++ b/genai-perf/genai_perf/record/types/gpu_power_usage.py @@ -20,33 +20,16 @@ @total_ordering class GPUPowerUsage(DecreasingGPURecord): """ - GPU Power Usage + GPU's power usage metric """ tag = "gpu_power_usage" def __init__(self, value, device_uuid=None, timestamp=0): - """ - Parameters - ---------- - value : float - The value of the GPU metrtic - device_uuid : str - The GPU device uuid this metric is associated - with. - timestamp : int - The timestamp for the record in nanoseconds - """ - super().__init__(value, device_uuid, timestamp) @staticmethod def aggregation_function(): - """ - The function that is used to aggregate - this type of record - """ - def average(seq): return sum(seq[1:], start=seq[0]) / len(seq) @@ -54,52 +37,16 @@ def average(seq): @staticmethod def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed as part of the header - indicating that this record has been aggregated using max, min or - average etc. - - Returns - ------- - str - The full name of the - metric. - """ - return ("Average " if aggregation_tag else "") + "GPU Power Usage (W)" - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - + def __eq__(self, other: "GPUPowerUsage") -> bool: # type: ignore return self.value() == other.value() - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - + def __lt__(self, other: "GPUPowerUsage") -> bool: return other.value() < self.value() - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - + def __add__(self, other: "GPUPowerUsage") -> "GPUPowerUsage": return GPUPowerUsage(device_uuid=None, value=(self.value() + other.value())) - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - + def __sub__(self, other: "GPUPowerUsage") -> "GPUPowerUsage": return GPUPowerUsage(device_uuid=None, value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_total_memory.py b/genai-perf/genai_perf/record/types/gpu_total_memory.py index 45fdc300..fd43030b 100644 --- a/genai-perf/genai_perf/record/types/gpu_total_memory.py +++ b/genai-perf/genai_perf/record/types/gpu_total_memory.py @@ -20,75 +20,26 @@ @total_ordering class GPUTotalMemory(IncreasingGPURecord): """ - The total memory in the GPU. + GPU's total memory metric """ - tag = "gpu_total_memory" + tag = "total_gpu_memory" def __init__(self, value, device_uuid=None, timestamp=0): - """ - Parameters - ---------- - value : float - The value of the GPU metrtic - device_uuid : str - The GPU device uuid this metric is associated - with. - timestamp : int - The timestamp for the record in nanoseconds - """ - super().__init__(value, device_uuid, timestamp) @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return ("Max " if aggregation_tag else "") + "GPU Memory Available (MB)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ + def header(aggregation_tag=False) -> str: + return ("Max " if aggregation_tag else "") + "GPU Memory Available (GB)" + def __eq__(self, other: "GPUTotalMemory") -> bool: # type: ignore return self.value() == other.value() - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - + def __lt__(self, other: "GPUTotalMemory") -> bool: return self.value() < other.value() - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - + def __add__(self, other: "GPUTotalMemory") -> "GPUTotalMemory": return GPUTotalMemory(device_uuid=None, value=(self.value() + other.value())) - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - + def __sub__(self, other: "GPUTotalMemory") -> "GPUTotalMemory": return GPUTotalMemory(device_uuid=None, value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_used_memory.py b/genai-perf/genai_perf/record/types/gpu_used_memory.py deleted file mode 100644 index c4b1cc66..00000000 --- a/genai-perf/genai_perf/record/types/gpu_used_memory.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.gpu_record import DecreasingGPURecord - - -@total_ordering -class GPUUsedMemory(DecreasingGPURecord): - """ - The used memory in the GPU. - """ - - tag = "gpu_used_memory" - - def __init__(self, value, device_uuid=None, timestamp=0): - """ - Parameters - ---------- - value : float - The value of the GPU metrtic - device_uuid : str - The GPU device uuid this metric is associated - with. - timestamp : int - The timestamp for the record in nanoseconds - """ - - super().__init__(value, device_uuid, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return ("Max " if aggregation_tag else "") + "GPU Memory Usage (MB)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return GPUUsedMemory(device_uuid=None, value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return GPUUsedMemory(device_uuid=None, value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_utilization.py b/genai-perf/genai_perf/record/types/gpu_utilization.py index 67a71f64..0c6e6f65 100644 --- a/genai-perf/genai_perf/record/types/gpu_utilization.py +++ b/genai-perf/genai_perf/record/types/gpu_utilization.py @@ -20,33 +20,16 @@ @total_ordering class GPUUtilization(IncreasingGPURecord): """ - GPU utilization record + GPU's utilization percentage """ tag = "gpu_utilization" def __init__(self, value, device_uuid=None, timestamp=0): - """ - Parameters - ---------- - value : float - The value of the GPU metrtic - device_uuid : str - The GPU device uuid this metric is associated - with. - timestamp : int - The timestamp for the record in nanoseconds - """ - super().__init__(value, device_uuid, timestamp) @staticmethod def aggregation_function(): - """ - The function that is used to aggregate - this type of record - """ - def average(seq): return sum(seq[1:], start=seq[0]) / len(seq) @@ -54,53 +37,16 @@ def average(seq): @staticmethod def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - return ("Average " if aggregation_tag else "") + "GPU Utilization (%)" - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - + def __eq__(self, other: "GPUUtilization") -> bool: # type: ignore return self.value() == other.value() - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - + def __lt__(self, other: "GPUUtilization") -> bool: return self.value() < other.value() - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - + def __add__(self, other: "GPUUtilization") -> "GPUUtilization": return GPUUtilization(device_uuid=None, value=(self.value() + other.value())) - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - + def __sub__(self, other: "GPUUtilization") -> "GPUUtilization": return GPUUtilization(device_uuid=None, value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/input_sequence_length.py b/genai-perf/genai_perf/record/types/input_sequence_length.py deleted file mode 100644 index 8d7d4065..00000000 --- a/genai-perf/genai_perf/record/types/input_sequence_length.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import IncreasingRecord - - -@total_ordering -class InputSequenceLength(IncreasingRecord): - """ - A record for perf_analyzer - metric 'Input Sequence Length' - """ - - tag = "input_sequence_length" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - The throughput from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def value_function(): - """ - Returns the total value from a list - - Returns - ------- - Total value of the list - """ - return sum - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Input Sequence Length" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() < other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_avg.py b/genai-perf/genai_perf/record/types/input_sequence_length_avg.py new file mode 100644 index 00000000..2fc71b62 --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_avg.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthAvg(InputSequenceLengthBase): + """ + A record for avg input sequence length (ISL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_avg" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Avg. Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_base.py b/genai-perf/genai_perf/record/types/input_sequence_length_base.py new file mode 100644 index 00000000..df15df5f --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_base.py @@ -0,0 +1,52 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import IncreasingRecord +from genai_perf.types import RecordValue + + +@total_ordering +class InputSequenceLengthBase(IncreasingRecord): + """ + A base class for the input sequence length (ISL) metric + """ + + base_tag = "input_sequence_length" + + def __init__(self, value: RecordValue, timestamp: int = 0) -> None: + super().__init__(value, timestamp) + + def __eq__(self, other: "InputSequenceLengthBase") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "InputSequenceLengthBase") -> bool: + return self.value() < other.value() + + def __add__(self, other: "InputSequenceLengthBase") -> "InputSequenceLengthBase": + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other: "InputSequenceLengthBase") -> "InputSequenceLengthBase": + """ + Allows subbing two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_max.py b/genai-perf/genai_perf/record/types/input_sequence_length_max.py new file mode 100644 index 00000000..84e3964e --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_max.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthMax(InputSequenceLengthBase): + """ + A record for max input sequence length (ISL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_max" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Max Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_min.py b/genai-perf/genai_perf/record/types/input_sequence_length_min.py new file mode 100644 index 00000000..a0950d55 --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_min.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthMin(InputSequenceLengthBase): + """ + A record for min input sequence length (ISL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_min" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Min Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_p25.py b/genai-perf/genai_perf/record/types/input_sequence_length_p25.py new file mode 100644 index 00000000..4637c661 --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_p25.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthP25(InputSequenceLengthBase): + """ + A record for p25 Input sequence length (ISL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_p25" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p25 Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_p50.py b/genai-perf/genai_perf/record/types/input_sequence_length_p50.py new file mode 100644 index 00000000..a5e305b4 --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_p50.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthP50(InputSequenceLengthBase): + """ + A record for p50 input sequence length (ISL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_p50" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p50 Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_p75.py b/genai-perf/genai_perf/record/types/input_sequence_length_p75.py new file mode 100644 index 00000000..9b7e7d0c --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_p75.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthP75(InputSequenceLengthBase): + """ + A record for p75 input sequence length (ISL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_p75" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p75 Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_p90.py b/genai-perf/genai_perf/record/types/input_sequence_length_p90.py new file mode 100644 index 00000000..90701d55 --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_p90.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthP90(InputSequenceLengthBase): + """ + A record for p90 Input sequence length (ISL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_p90" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p90 Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_p95.py b/genai-perf/genai_perf/record/types/input_sequence_length_p95.py new file mode 100644 index 00000000..1a4a6492 --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_p95.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthP95(InputSequenceLengthBase): + """ + A record for p95 input sequence length (OSL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_p95" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p95 Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_p99.py b/genai-perf/genai_perf/record/types/input_sequence_length_p99.py new file mode 100644 index 00000000..fbb39b08 --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_p99.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthP99(InputSequenceLengthBase): + """ + A record for p99 input sequence length (ISL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_p99" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p99 Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_std.py b/genai-perf/genai_perf/record/types/input_sequence_length_std.py new file mode 100644 index 00000000..1d69f839 --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_std.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthStd(InputSequenceLengthBase): + """ + A record for std input sequence length (OSL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_std" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Std Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_avg.py b/genai-perf/genai_perf/record/types/inter_token_latency_avg.py index d6a6f947..0c72abbf 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_avg.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_avg.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyAvg(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for avg Inter token latency metric """ - tag = "inter_token_latency_avg" + tag = InterTokenLatencyBase.base_tag + "_avg" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "Avg Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_base.py b/genai-perf/genai_perf/record/types/inter_token_latency_base.py index f267969c..37aafc98 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_base.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_base.py @@ -15,44 +15,27 @@ from functools import total_ordering from genai_perf.record.record import DecreasingRecord +from genai_perf.types import RecordValue @total_ordering class InterTokenLatencyBase(DecreasingRecord): """ - A record for perf_analyzer Inter token latency metric + A base class record for the inter-token latency metric """ - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ + base_tag = "inter_token_latency" + def __init__(self, value: RecordValue, timestamp: int = 0) -> None: super().__init__(value, timestamp) - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - + def __eq__(self, other: "InterTokenLatencyBase") -> bool: # type: ignore return self.value() == other.value() - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - + def __lt__(self, other: "InterTokenLatencyBase") -> bool: return self.value() > other.value() - def __add__(self, other): + def __add__(self, other: "InterTokenLatencyBase") -> "InterTokenLatencyBase": """ Allows adding two records together to produce a brand new record. @@ -60,7 +43,7 @@ def __add__(self, other): return self.__class__(value=(self.value() + other.value())) - def __sub__(self, other): + def __sub__(self, other: "InterTokenLatencyBase") -> "InterTokenLatencyBase": """ Allows subbing two records together to produce a brand new record. diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_max.py b/genai-perf/genai_perf/record/types/inter_token_latency_max.py index d50d8cd1..f926ca2c 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_max.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_max.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyMax(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for max Inter token latency metric """ - tag = "inter_token_latency_max" + tag = InterTokenLatencyBase.base_tag + "_max" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "Max Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_min.py b/genai-perf/genai_perf/record/types/inter_token_latency_min.py index 4a848480..6152977a 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_min.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_min.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyMin(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for min Inter token latency metric """ - tag = "inter_token_latency_min" + tag = InterTokenLatencyBase.base_tag + "_min" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "Min Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p25.py b/genai-perf/genai_perf/record/types/inter_token_latency_p25.py index 261caae6..86dec41b 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_p25.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p25.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyP25(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for p25 Inter token latency metric """ - tag = "inter_token_latency_p25" + tag = InterTokenLatencyBase.base_tag + "_p25" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p25 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p50.py b/genai-perf/genai_perf/record/types/inter_token_latency_p50.py index 4f277996..7b047c9a 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_p50.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p50.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyP50(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for p50 Inter token latency metric """ - tag = "inter_token_latency_p50" + tag = InterTokenLatencyBase.base_tag + "_p50" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p50 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p75.py b/genai-perf/genai_perf/record/types/inter_token_latency_p75.py index f95a938a..73ddeea6 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_p75.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p75.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyP75(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for p75 Inter token latency metric """ - tag = "inter_token_latency_p75" + tag = InterTokenLatencyBase.base_tag + "_p75" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p75 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p90.py b/genai-perf/genai_perf/record/types/inter_token_latency_p90.py index 14b9e489..b316f9bd 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_p90.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p90.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyP90(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for p90 Inter token latency metric """ - tag = "inter_token_latency_p90" + tag = InterTokenLatencyBase.base_tag + "_p90" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p90 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p95.py b/genai-perf/genai_perf/record/types/inter_token_latency_p95.py index 685d25b6..ef350d82 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_p95.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p95.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyP95(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for p95 Inter token latency metric """ - tag = "inter_token_latency_p95" + tag = InterTokenLatencyBase.base_tag + "_p95" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p95 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p99.py b/genai-perf/genai_perf/record/types/inter_token_latency_p99.py index 73354707..61952c61 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_p99.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p99.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyP99(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for p99 Inter token latency metric """ - tag = "inter_token_latency_p99" + tag = InterTokenLatencyBase.base_tag + "_p99" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p99 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_std.py b/genai-perf/genai_perf/record/types/inter_token_latency_std.py new file mode 100644 index 00000000..9aa86721 --- /dev/null +++ b/genai-perf/genai_perf/record/types/inter_token_latency_std.py @@ -0,0 +1,33 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.inter_token_latency_base import InterTokenLatencyBase + + +@total_ordering +class InterTokenLatencyStd(InterTokenLatencyBase): + """ + A record for std Inter token latency metric + """ + + tag = InterTokenLatencyBase.base_tag + "_std" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Std Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length.py b/genai-perf/genai_perf/record/types/output_sequence_length.py deleted file mode 100644 index f42f0f18..00000000 --- a/genai-perf/genai_perf/record/types/output_sequence_length.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import IncreasingRecord - - -@total_ordering -class OutputSequenceLength(IncreasingRecord): - """ - A record for perf_analyzer - metric 'Output Sequence Length' - """ - - tag = "output_sequence_length" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - The throughput from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def value_function(): - """ - Returns the total value from a list - - Returns - ------- - Total value of the list - """ - return sum - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Output Sequence Length" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() < other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_avg.py b/genai-perf/genai_perf/record/types/output_sequence_length_avg.py new file mode 100644 index 00000000..d6613151 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_avg.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthAvg(OutputSequenceLengthBase): + """ + A record for avg output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_avg" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Avg. Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_base.py b/genai-perf/genai_perf/record/types/output_sequence_length_base.py new file mode 100644 index 00000000..5c92427d --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_base.py @@ -0,0 +1,52 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import IncreasingRecord +from genai_perf.types import RecordValue + + +@total_ordering +class OutputSequenceLengthBase(IncreasingRecord): + """ + A base class for the output sequence length (OSL) metric + """ + + base_tag = "output_sequence_length" + + def __init__(self, value: RecordValue, timestamp: int = 0) -> None: + super().__init__(value, timestamp) + + def __eq__(self, other: "OutputSequenceLengthBase") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "OutputSequenceLengthBase") -> bool: + return self.value() < other.value() + + def __add__(self, other: "OutputSequenceLengthBase") -> "OutputSequenceLengthBase": + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other: "OutputSequenceLengthBase") -> "OutputSequenceLengthBase": + """ + Allows subbing two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_max.py b/genai-perf/genai_perf/record/types/output_sequence_length_max.py new file mode 100644 index 00000000..cafcb7b4 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_max.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthMax(OutputSequenceLengthBase): + """ + A record for max output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_max" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Max Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_min.py b/genai-perf/genai_perf/record/types/output_sequence_length_min.py new file mode 100644 index 00000000..2dbfb94e --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_min.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthMin(OutputSequenceLengthBase): + """ + A record for min output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_min" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Min Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_p25.py b/genai-perf/genai_perf/record/types/output_sequence_length_p25.py new file mode 100644 index 00000000..1a199072 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_p25.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthP25(OutputSequenceLengthBase): + """ + A record for p25 output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_p25" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p25 Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_p50.py b/genai-perf/genai_perf/record/types/output_sequence_length_p50.py new file mode 100644 index 00000000..930152ed --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_p50.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthP50(OutputSequenceLengthBase): + """ + A record for p50 output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_p50" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p50 Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_p75.py b/genai-perf/genai_perf/record/types/output_sequence_length_p75.py new file mode 100644 index 00000000..5758051e --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_p75.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthP75(OutputSequenceLengthBase): + """ + A record for p75 output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_p75" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p75 Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_p90.py b/genai-perf/genai_perf/record/types/output_sequence_length_p90.py new file mode 100644 index 00000000..d2fb719a --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_p90.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthP90(OutputSequenceLengthBase): + """ + A record for p90 output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_p90" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p90 Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_p95.py b/genai-perf/genai_perf/record/types/output_sequence_length_p95.py new file mode 100644 index 00000000..cce6c7a3 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_p95.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthP95(OutputSequenceLengthBase): + """ + A record for p95 output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_p95" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p95 Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_p99.py b/genai-perf/genai_perf/record/types/output_sequence_length_p99.py new file mode 100644 index 00000000..e419fc73 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_p99.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthP99(OutputSequenceLengthBase): + """ + A record for p99 output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_p99" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p99 Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_std.py b/genai-perf/genai_perf/record/types/output_sequence_length_std.py new file mode 100644 index 00000000..0ef8038f --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_std.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthStd(OutputSequenceLengthBase): + """ + A record for std output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_std" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Std Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput.py b/genai-perf/genai_perf/record/types/output_token_throughput.py deleted file mode 100644 index d5635491..00000000 --- a/genai-perf/genai_perf/record/types/output_token_throughput.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import IncreasingRecord - - -@total_ordering -class OutputTokenThroughput(IncreasingRecord): - """ - A record for perf_analyzer - metric 'Output Token Throughput' - """ - - tag = "output_token_throughput" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - The throughput from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def value_function(): - """ - Returns the total value from a list - - Returns - ------- - Total value of the list - """ - return sum - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Output Token Throughput (infer/sec)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() < other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_avg.py b/genai-perf/genai_perf/record/types/output_token_throughput_avg.py new file mode 100644 index 00000000..c8b83960 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_avg.py @@ -0,0 +1,50 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import IncreasingRecord +from genai_perf.types import RecordValue + + +@total_ordering +class OutputTokenThroughputAvg(IncreasingRecord): + """ + A record for avg output token throughput metric + """ + + tag = "output_token_throughput_avg" + + def __init__(self, value: RecordValue, timestamp: int = 0) -> None: + super().__init__(value, timestamp) + + @staticmethod + def value_function(): + return sum + + @staticmethod + def header(aggregation_tag=False) -> str: + return "Avg. Output Token Throughput (tokens/sec)" + + def __eq__(self, other: "OutputTokenThroughputAvg") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "OutputTokenThroughputAvg") -> bool: + return self.value() < other.value() + + def __add__(self, other: "OutputTokenThroughputAvg") -> "OutputTokenThroughputAvg": + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other: "OutputTokenThroughputAvg") -> "OutputTokenThroughputAvg": + return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request.py deleted file mode 100644 index 04065ef3..00000000 --- a/genai-perf/genai_perf/record/types/output_token_throughput_per_request.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import IncreasingRecord - - -@total_ordering -class OutputTokenThroughputPerRequest(IncreasingRecord): - """ - A record for perf_analyzer - metric 'Output Token Throughput Per Request' - """ - - tag = "output_token_throughput_per_request" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - The throughput from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def value_function(): - """ - Returns the total value from a list - - Returns - ------- - Total value of the list - """ - return sum - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Output Token Throughput Per Request (infer/sec)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() < other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_avg.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_avg.py new file mode 100644 index 00000000..97dbd020 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_avg.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestAvg(OutputTokenThroughputPerRequestBase): + """ + A record for avg output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_avg" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Avg. Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_base.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_base.py new file mode 100644 index 00000000..321aad55 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_base.py @@ -0,0 +1,56 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import IncreasingRecord +from genai_perf.types import RecordValue + + +@total_ordering +class OutputTokenThroughputPerRequestBase(IncreasingRecord): + """ + A base class for the output token throughput per request metric + """ + + base_tag = "output_token_throughput_per_request" + + def __init__(self, value: RecordValue, timestamp: int = 0) -> None: + super().__init__(value, timestamp) + + def __eq__(self, other: "OutputTokenThroughputPerRequestBase") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "OutputTokenThroughputPerRequestBase") -> bool: + return self.value() < other.value() + + def __add__( + self, other: "OutputTokenThroughputPerRequestBase" + ) -> "OutputTokenThroughputPerRequestBase": + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__( + self, other: "OutputTokenThroughputPerRequestBase" + ) -> "OutputTokenThroughputPerRequestBase": + """ + Allows subbing two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_max.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_max.py new file mode 100644 index 00000000..c15e7794 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_max.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestMax(OutputTokenThroughputPerRequestBase): + """ + A record for max output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_max" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Max Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_min.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_min.py new file mode 100644 index 00000000..b84ad598 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_min.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestMin(OutputTokenThroughputPerRequestBase): + """ + A record for min output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_min" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Min Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p25.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p25.py new file mode 100644 index 00000000..dcbd3e79 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p25.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestP25(OutputTokenThroughputPerRequestBase): + """ + A record for p25 output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_p25" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p25 Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p50.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p50.py new file mode 100644 index 00000000..70636297 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p50.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestP50(OutputTokenThroughputPerRequestBase): + """ + A record for p50 output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_p50" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p50 Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p75.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p75.py new file mode 100644 index 00000000..be01cb8a --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p75.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestP75(OutputTokenThroughputPerRequestBase): + """ + A record for p75 output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_p75" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p75 Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p90.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p90.py new file mode 100644 index 00000000..175ce97e --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p90.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestP90(OutputTokenThroughputPerRequestBase): + """ + A record for p90 output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_p90" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p90 Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p95.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p95.py new file mode 100644 index 00000000..5d1f2d83 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p95.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestP95(OutputTokenThroughputPerRequestBase): + """ + A record for p95 output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_p95" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p95 Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p99.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p99.py new file mode 100644 index 00000000..68261ab5 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p99.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestP99(OutputTokenThroughputPerRequestBase): + """ + A record for P99 output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_p99" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p99 Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_std.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_std.py new file mode 100644 index 00000000..22a5dc5c --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_std.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestStd(OutputTokenThroughputPerRequestBase): + """ + A record for std output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_std" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Std Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/perf_client_response_wait.py b/genai-perf/genai_perf/record/types/perf_client_response_wait.py deleted file mode 100644 index 0204634f..00000000 --- a/genai-perf/genai_perf/record/types/perf_client_response_wait.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import DecreasingRecord - - -@total_ordering -class PerfClientResponseWait(DecreasingRecord): - """ - A record for perf_analyzer - metric 'Client response wait' - """ - - tag = "perf_client_response_wait" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Response Wait Time (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return PerfClientResponseWait(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return PerfClientResponseWait(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_client_send_recv.py b/genai-perf/genai_perf/record/types/perf_client_send_recv.py deleted file mode 100644 index e6eca6a2..00000000 --- a/genai-perf/genai_perf/record/types/perf_client_send_recv.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import DecreasingRecord - - -@total_ordering -class PerfClientSendRecv(DecreasingRecord): - """ - A record for perf_analyzer - metric 'Client send/recv' - """ - - tag = "perf_client_send_recv" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Client Send/Recv (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return PerfClientSendRecv(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return PerfClientSendRecv(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_latency_avg.py b/genai-perf/genai_perf/record/types/perf_latency_avg.py deleted file mode 100644 index 7b40ce14..00000000 --- a/genai-perf/genai_perf/record/types/perf_latency_avg.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.types.perf_latency_base import PerfLatencyBase - - -@total_ordering -class PerfLatencyAvg(PerfLatencyBase): - """ - A record for perf_analyzer latency metric - """ - - tag = "perf_latency_avg" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Avg Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/perf_latency_p90.py b/genai-perf/genai_perf/record/types/perf_latency_p90.py deleted file mode 100644 index e05b7632..00000000 --- a/genai-perf/genai_perf/record/types/perf_latency_p90.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.types.perf_latency_base import PerfLatencyBase - - -@total_ordering -class PerfLatencyP90(PerfLatencyBase): - """ - A record for perf_analyzer latency metric - """ - - tag = "perf_latency_p90" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "p90 Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/perf_latency_p95.py b/genai-perf/genai_perf/record/types/perf_latency_p95.py deleted file mode 100644 index 53877cad..00000000 --- a/genai-perf/genai_perf/record/types/perf_latency_p95.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.types.perf_latency_base import PerfLatencyBase - - -@total_ordering -class PerfLatencyP95(PerfLatencyBase): - """ - A record for perf_analyzer latency metric - """ - - tag = "perf_latency_p95" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "p95 Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/perf_latency_p99.py b/genai-perf/genai_perf/record/types/perf_latency_p99.py deleted file mode 100644 index ba568a90..00000000 --- a/genai-perf/genai_perf/record/types/perf_latency_p99.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.types.perf_latency_base import PerfLatencyBase - - -@total_ordering -class PerfLatencyP99(PerfLatencyBase): - """ - A record for perf_analyzer latency metric - """ - - tag = "perf_latency_p99" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "p99 Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/perf_server_compute_infer.py b/genai-perf/genai_perf/record/types/perf_server_compute_infer.py deleted file mode 100644 index 5e65dd9e..00000000 --- a/genai-perf/genai_perf/record/types/perf_server_compute_infer.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import DecreasingRecord - - -@total_ordering -class PerfServerComputeInfer(DecreasingRecord): - """ - A record for Server compute infer time - from the perf analyzer - """ - - tag = "perf_server_compute_infer" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Server Compute Infer time (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return PerfServerComputeInfer(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return PerfServerComputeInfer(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_server_compute_input.py b/genai-perf/genai_perf/record/types/perf_server_compute_input.py deleted file mode 100644 index 313f7a44..00000000 --- a/genai-perf/genai_perf/record/types/perf_server_compute_input.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import DecreasingRecord - - -@total_ordering -class PerfServerComputeInput(DecreasingRecord): - """ - A record for Server compute input time - from the perf analyzer - """ - - tag = "perf_server_compute_input" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Server Compute Input time (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return PerfServerComputeInput(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return PerfServerComputeInput(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_server_compute_output.py b/genai-perf/genai_perf/record/types/perf_server_compute_output.py deleted file mode 100644 index 914e3cf5..00000000 --- a/genai-perf/genai_perf/record/types/perf_server_compute_output.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import DecreasingRecord - - -@total_ordering -class PerfServerComputeOutput(DecreasingRecord): - """ - A record for Server compute output time - from the perf analyzer - """ - - tag = "perf_server_compute_output" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Server Compute Output time (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return PerfServerComputeOutput(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return PerfServerComputeOutput(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_server_queue.py b/genai-perf/genai_perf/record/types/perf_server_queue.py deleted file mode 100644 index 3f1c7144..00000000 --- a/genai-perf/genai_perf/record/types/perf_server_queue.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import DecreasingRecord - - -@total_ordering -class PerfServerQueue(DecreasingRecord): - """ - A record for Server queue time - from the perf analyzer - """ - - tag = "perf_server_queue" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Server Queue time (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return PerfServerQueue(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return PerfServerQueue(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_throughput.py b/genai-perf/genai_perf/record/types/perf_throughput.py deleted file mode 100644 index 4d52d342..00000000 --- a/genai-perf/genai_perf/record/types/perf_throughput.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import IncreasingRecord - - -@total_ordering -class PerfThroughput(IncreasingRecord): - """ - A record for perf_analyzer - metric 'Throughput' - """ - - tag = "perf_throughput" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - The throughput from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def value_function(): - """ - Returns the total value from a list - - Returns - ------- - Total value of the list - """ - return sum - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Throughput (infer/sec)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() < other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return PerfThroughput(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return PerfThroughput(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/request_goodput_avg.py b/genai-perf/genai_perf/record/types/request_goodput_avg.py new file mode 100644 index 00000000..3d9c2394 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_goodput_avg.py @@ -0,0 +1,49 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import IncreasingRecord + + +@total_ordering +class RequestGoodputAvg(IncreasingRecord): + """ + A record avg request goodput metric + """ + + tag = "request_goodput_avg" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @staticmethod + def value_function(): + return sum + + @staticmethod + def header(aggregation_tag=False) -> str: + return "Request Goodput (requests/sec)" + + def __eq__(self, other: "RequestGoodputAvg") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "RequestGoodputAvg") -> bool: + return self.value() < other.value() + + def __add__(self, other: "RequestGoodputAvg") -> "RequestGoodputAvg": + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other: "RequestGoodputAvg") -> "RequestGoodputAvg": + return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/request_latency_avg.py b/genai-perf/genai_perf/record/types/request_latency_avg.py new file mode 100644 index 00000000..2f7a0bb5 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_avg.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyAvg(RequestLatencyBase): + """ + A record for avg request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_avg" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Avg. Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/perf_latency_base.py b/genai-perf/genai_perf/record/types/request_latency_base.py similarity index 67% rename from genai-perf/genai_perf/record/types/perf_latency_base.py rename to genai-perf/genai_perf/record/types/request_latency_base.py index b7d6f5f5..37926a1e 100644 --- a/genai-perf/genai_perf/record/types/perf_latency_base.py +++ b/genai-perf/genai_perf/record/types/request_latency_base.py @@ -15,44 +15,27 @@ from functools import total_ordering from genai_perf.record.record import DecreasingRecord +from genai_perf.types import RecordValue @total_ordering -class PerfLatencyBase(DecreasingRecord): +class RequestLatencyBase(DecreasingRecord): """ - A base class for perf_analyzer latency metric + A base class for the request latency metric """ - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ + base_tag = "request_latency" + def __init__(self, value: RecordValue, timestamp: int = 0) -> None: super().__init__(value, timestamp) - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - + def __eq__(self, other: "RequestLatencyBase") -> bool: # type: ignore return self.value() == other.value() - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - + def __lt__(self, other: "RequestLatencyBase") -> bool: return self.value() > other.value() - def __add__(self, other): + def __add__(self, other: "RequestLatencyBase") -> "RequestLatencyBase": """ Allows adding two records together to produce a brand new record. @@ -60,7 +43,7 @@ def __add__(self, other): return self.__class__(value=(self.value() + other.value())) - def __sub__(self, other): + def __sub__(self, other: "RequestLatencyBase") -> "RequestLatencyBase": """ Allows subbing two records together to produce a brand new record. diff --git a/genai-perf/genai_perf/record/types/request_latency_max.py b/genai-perf/genai_perf/record/types/request_latency_max.py new file mode 100644 index 00000000..70ec40e4 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_max.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyMaX(RequestLatencyBase): + """ + A record for max request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_max" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Max Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_min.py b/genai-perf/genai_perf/record/types/request_latency_min.py new file mode 100644 index 00000000..ac84e3d8 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_min.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyMin(RequestLatencyBase): + """ + A record for min request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_min" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Min Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_p25.py b/genai-perf/genai_perf/record/types/request_latency_p25.py new file mode 100644 index 00000000..cd011a65 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_p25.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyP25(RequestLatencyBase): + """ + A record for p25 request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_p25" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p25 Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_p50.py b/genai-perf/genai_perf/record/types/request_latency_p50.py new file mode 100644 index 00000000..9de0c760 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_p50.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyAvg(RequestLatencyBase): + """ + A record for p50 request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_p50" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p50 Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_p75.py b/genai-perf/genai_perf/record/types/request_latency_p75.py new file mode 100644 index 00000000..adb79f6f --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_p75.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyP75(RequestLatencyBase): + """ + A record for p75 request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_p75" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p75 Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_p90.py b/genai-perf/genai_perf/record/types/request_latency_p90.py new file mode 100644 index 00000000..901d55b2 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_p90.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyP90(RequestLatencyBase): + """ + A record for p90 request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_p90" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p90 Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_p95.py b/genai-perf/genai_perf/record/types/request_latency_p95.py new file mode 100644 index 00000000..b1ef4061 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_p95.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyP95(RequestLatencyBase): + """ + A record for p95 request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_p95" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p95 Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_p99.py b/genai-perf/genai_perf/record/types/request_latency_p99.py new file mode 100644 index 00000000..41415a96 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_p99.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyP99(RequestLatencyBase): + """ + A record for p99 request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_p99" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p99 Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_std.py b/genai-perf/genai_perf/record/types/request_latency_std.py new file mode 100644 index 00000000..a79e7b48 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_std.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyStd(RequestLatencyBase): + """ + A record for std request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_std" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Std. Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_throughput_avg.py b/genai-perf/genai_perf/record/types/request_throughput_avg.py new file mode 100644 index 00000000..15b8b453 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_throughput_avg.py @@ -0,0 +1,49 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import IncreasingRecord + + +@total_ordering +class RequestThroughputAvg(IncreasingRecord): + """ + A record avg request throughput metric + """ + + tag = "request_throughput_avg" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @staticmethod + def value_function(): + return sum + + @staticmethod + def header(aggregation_tag=False) -> str: + return "Request Throughput (requests/sec)" + + def __eq__(self, other: "RequestThroughputAvg") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "RequestThroughputAvg") -> bool: + return self.value() < other.value() + + def __add__(self, other: "RequestThroughputAvg") -> "RequestThroughputAvg": + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other: "RequestThroughputAvg") -> "RequestThroughputAvg": + return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_avg.py b/genai-perf/genai_perf/record/types/time_to_first_token_avg.py index 3a9b861b..8f2f095f 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_avg.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_avg.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenAvg(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for avg Time to first token metric """ - tag = "time_to_first_token_avg" + tag = TimeToFirstTokenBase.base_tag + "_avg" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "Avg Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_base.py b/genai-perf/genai_perf/record/types/time_to_first_token_base.py index f7216f3f..04f77354 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_base.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_base.py @@ -15,44 +15,27 @@ from functools import total_ordering from genai_perf.record.record import DecreasingRecord +from genai_perf.types import RecordValue @total_ordering class TimeToFirstTokenBase(DecreasingRecord): """ - A base class record for perf_analyzer time to first token metric + A base class record for the time to first token metric """ - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ + base_tag = "time_to_first_token" + def __init__(self, value: RecordValue, timestamp: int = 0) -> None: super().__init__(value, timestamp) - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - + def __eq__(self, other: "TimeToFirstTokenBase") -> bool: # type: ignore return self.value() == other.value() - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - + def __lt__(self, other: "TimeToFirstTokenBase") -> bool: return self.value() > other.value() - def __add__(self, other): + def __add__(self, other: "TimeToFirstTokenBase") -> "TimeToFirstTokenBase": """ Allows adding two records together to produce a brand new record. @@ -60,7 +43,7 @@ def __add__(self, other): return self.__class__(value=(self.value() + other.value())) - def __sub__(self, other): + def __sub__(self, other: "TimeToFirstTokenBase") -> "TimeToFirstTokenBase": """ Allows subbing two records together to produce a brand new record. diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_max.py b/genai-perf/genai_perf/record/types/time_to_first_token_max.py index eaba2b75..e4c35fea 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_max.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_max.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenMax(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for max Time to first token metric """ - tag = "time_to_first_token_max" + tag = TimeToFirstTokenBase.base_tag + "_max" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "Max Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_min.py b/genai-perf/genai_perf/record/types/time_to_first_token_min.py index 15612bee..6b42e58c 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_min.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_min.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenMin(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for min Time to first token metric """ - tag = "time_to_first_token_min" + tag = TimeToFirstTokenBase.base_tag + "_min" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "Min Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p25.py b/genai-perf/genai_perf/record/types/time_to_first_token_p25.py index cd472f67..070d3c17 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_p25.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p25.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenP25(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for p25 Time to first token metric """ - tag = "time_to_first_token_p25" + tag = TimeToFirstTokenBase.base_tag + "_p25" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p25 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p50.py b/genai-perf/genai_perf/record/types/time_to_first_token_p50.py index 68b8a5a8..cbc3eb4f 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_p50.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p50.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenP50(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for p50 Time to first token metric """ - tag = "time_to_first_token_p50" + tag = TimeToFirstTokenBase.base_tag + "_p50" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p50 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p75.py b/genai-perf/genai_perf/record/types/time_to_first_token_p75.py index 6c22469b..a55f3b26 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_p75.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p75.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenP75(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for p75 Time to first token metric """ - tag = "time_to_first_token_p75" + tag = TimeToFirstTokenBase.base_tag + "_p75" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p75 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p90.py b/genai-perf/genai_perf/record/types/time_to_first_token_p90.py index 431ce6eb..c44e2766 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_p90.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p90.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenP90(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for p90 Time to first token metric """ - tag = "time_to_first_token_p90" + tag = TimeToFirstTokenBase.base_tag + "_p90" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p90 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p95.py b/genai-perf/genai_perf/record/types/time_to_first_token_p95.py index 5b118301..74905500 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_p95.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p95.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenP95(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for p95 Time to first token metric """ - tag = "time_to_first_token_p95" + tag = TimeToFirstTokenBase.base_tag + "_p95" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p95 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p99.py b/genai-perf/genai_perf/record/types/time_to_first_token_p99.py index 68125f2e..dd5a16ed 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_p99.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p99.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenP99(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for p99 Time to first token metric """ - tag = "time_to_first_token_p99" + tag = TimeToFirstTokenBase.base_tag + "_p99" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p99 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_std.py b/genai-perf/genai_perf/record/types/time_to_first_token_std.py new file mode 100644 index 00000000..148deb89 --- /dev/null +++ b/genai-perf/genai_perf/record/types/time_to_first_token_std.py @@ -0,0 +1,33 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.time_to_first_token_base import TimeToFirstTokenBase + + +@total_ordering +class TimeToFirstTokenMin(TimeToFirstTokenBase): + """ + A record for std Time to first token metric + """ + + tag = TimeToFirstTokenBase.base_tag + "_std" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Std Time To First Token (ms)" diff --git a/genai-perf/genai_perf/types.py b/genai-perf/genai_perf/types.py index 578da98a..b5e1f205 100644 --- a/genai-perf/genai_perf/types.py +++ b/genai-perf/genai_perf/types.py @@ -38,6 +38,7 @@ GpuRecords: TypeAlias = Dict[GpuId, TelemetryRecords] PerfRecords: TypeAlias = Dict[str, "Record"] # type: ignore PerfMetricName: TypeAlias = str +RecordValue = Union[float, int] ########################################################################### # Constraints diff --git a/genai-perf/tests/test_llm_profile_data_parser.py b/genai-perf/tests/test_llm_profile_data_parser.py index d3f213cb..01b03b0d 100644 --- a/genai-perf/tests/test_llm_profile_data_parser.py +++ b/genai-perf/tests/test_llm_profile_data_parser.py @@ -32,6 +32,7 @@ from genai_perf.metrics import LLMMetrics from genai_perf.metrics.statistics import Statistics from genai_perf.profile_data_parser import LLMProfileDataParser +from genai_perf.record.types.request_throughput_avg import RequestThroughputAvg from genai_perf.tokenizer import DEFAULT_TOKENIZER, get_tokenizer from .test_utils import check_statistics, ns_to_sec @@ -216,6 +217,11 @@ def test_triton_llm_profile_data( check_llm_metrics(metrics, expected_metrics) check_statistics(statistics, expected_statistics) + # Check that Records can be created + records = statistics.create_records() + assert records is not None + assert records[0].tag == RequestThroughputAvg.tag + # check non-existing profile data with pytest.raises(KeyError): pd.get_statistics(infer_mode="concurrency", load_level="30") diff --git a/genai-perf/tests/test_model_config_measurement.py b/genai-perf/tests/test_model_config_measurement.py index 5f22b45e..ba6fd27f 100644 --- a/genai-perf/tests/test_model_config_measurement.py +++ b/genai-perf/tests/test_model_config_measurement.py @@ -21,8 +21,8 @@ ModelConfigMeasurement, ModelConfigMeasurementDefaults, ) -from genai_perf.record.types.perf_latency_p99 import PerfLatencyP99 -from genai_perf.record.types.perf_throughput import PerfThroughput +from genai_perf.record.types.request_latency_p99 import RequestLatencyP99 +from genai_perf.record.types.request_throughput_avg import RequestThroughputAvg from genai_perf.record.types.time_to_first_token_avg import TimeToFirstTokenAvg @@ -32,22 +32,22 @@ class TestModelConfigMeasurement(unittest.TestCase): ########################################################################### def setUp(self): - self.throughput_recordA = PerfThroughput(1000) - self.latency_recordA = PerfLatencyP99(20) + self.throughput_recordA = RequestThroughputAvg(1000) + self.latency_recordA = RequestLatencyP99(20) self.perf_metricsA = { - PerfThroughput.tag: self.throughput_recordA, - PerfLatencyP99.tag: self.latency_recordA, + RequestThroughputAvg.tag: self.throughput_recordA, + RequestLatencyP99.tag: self.latency_recordA, } self.mcmA = ModelConfigMeasurement(self.perf_metricsA) - self.throughput_recordB = PerfThroughput(500) - self.latency_recordB = PerfLatencyP99(10) + self.throughput_recordB = RequestThroughputAvg(500) + self.latency_recordB = RequestLatencyP99(10) self.perf_metricsB = { - PerfThroughput.tag: self.throughput_recordB, - PerfLatencyP99.tag: self.latency_recordB, + RequestThroughputAvg.tag: self.throughput_recordB, + RequestLatencyP99.tag: self.latency_recordB, } self.mcmB = ModelConfigMeasurement(self.perf_metricsB) @@ -64,10 +64,10 @@ def test_basic_accessor_methods(self): """ self.assertEqual(self.mcmA.get_perf_metrics(), self.perf_metricsA) self.assertEqual( - self.mcmA.get_perf_metric(PerfLatencyP99.tag), self.latency_recordA + self.mcmA.get_perf_metric(RequestLatencyP99.tag), self.latency_recordA ) self.assertEqual( - self.mcmA.get_perf_metric_value(PerfThroughput.tag, return_value=-1), + self.mcmA.get_perf_metric_value(RequestThroughputAvg.tag, return_value=-1), self.throughput_recordA.value(), ) self.assertEqual( @@ -85,8 +85,10 @@ def test_set_metric_objective(self): self.mcmA._metric_objectives, ) - self.mcmA.set_metric_objectives({PerfThroughput.tag: 2, PerfLatencyP99.tag: 3}) - expected_mw = {PerfThroughput.tag: 2 / 5, PerfLatencyP99.tag: 3 / 5} + self.mcmA.set_metric_objectives( + {RequestThroughputAvg.tag: 2, RequestLatencyP99.tag: 3} + ) + expected_mw = {RequestThroughputAvg.tag: 2 / 5, RequestLatencyP99.tag: 3 / 5} self.assertEqual(expected_mw, self.mcmA._metric_objectives) def test_get_weighted_score(self): @@ -104,8 +106,8 @@ def test_get_weighted_score(self): # In this case we will change the objective to be latency, with mcmA = 20, mcmB = 5 # since latency is a decreasing record (lower is better), scoreB will be positive - self.mcmA.set_metric_objectives({PerfLatencyP99.tag: 1}) - self.mcmB.set_metric_objectives({PerfLatencyP99.tag: 1}) + self.mcmA.set_metric_objectives({RequestLatencyP99.tag: 1}) + self.mcmB.set_metric_objectives({RequestLatencyP99.tag: 1}) scoreA = self.mcmA.get_weighted_score(self.mcmB) scoreB = self.mcmB.get_weighted_score(self.mcmA) @@ -144,8 +146,8 @@ def test_calculate_weighted_percentage_gain(self): self.assertEqual(self.mcmA.calculate_weighted_percentage_gain(self.mcmB), 100) self.assertEqual(self.mcmB.calculate_weighted_percentage_gain(self.mcmA), -50) - self.mcmA.set_metric_objectives({PerfLatencyP99.tag: 1}) - self.mcmB.set_metric_objectives({PerfLatencyP99.tag: 1}) + self.mcmA.set_metric_objectives({RequestLatencyP99.tag: 1}) + self.mcmB.set_metric_objectives({RequestLatencyP99.tag: 1}) # latency: mcmA: 20, mcmB: 10 self.assertEqual(self.mcmA.calculate_weighted_percentage_gain(self.mcmB), -50) @@ -157,8 +159,12 @@ def test_calculate_weighted_percentage_gain(self): # # mcmA has 50% worse throughput, but 100% better latency # mcmB has 100% better latency, but 50% worse throughput - self.mcmA.set_metric_objectives({PerfThroughput.tag: 1, PerfLatencyP99.tag: 1}) - self.mcmB.set_metric_objectives({PerfThroughput.tag: 1, PerfLatencyP99.tag: 1}) + self.mcmA.set_metric_objectives( + {RequestThroughputAvg.tag: 1, RequestLatencyP99.tag: 1} + ) + self.mcmB.set_metric_objectives( + {RequestThroughputAvg.tag: 1, RequestLatencyP99.tag: 1} + ) self.assertEqual(self.mcmA, self.mcmB) self.assertEqual(self.mcmA.calculate_weighted_percentage_gain(self.mcmB), 25) self.assertEqual(self.mcmB.calculate_weighted_percentage_gain(self.mcmA), 25) @@ -170,13 +176,13 @@ def test_is_better_than(self): """ Test that individual metric comparison works as expected """ - self.mcmA.set_metric_objectives({PerfThroughput.tag: 1}) + self.mcmA.set_metric_objectives({RequestThroughputAvg.tag: 1}) # throughput: 1000 is better than 500 self.assertTrue(self.mcmA.is_better_than(self.mcmB)) self.assertGreater(self.mcmA, self.mcmB) - self.mcmA.set_metric_objectives({PerfLatencyP99.tag: 1}) + self.mcmA.set_metric_objectives({RequestLatencyP99.tag: 1}) # latency: 20 is worse than 10 self.assertFalse(self.mcmA.is_better_than(self.mcmB)) @@ -188,7 +194,9 @@ def test_is_better_than_combo(self): """ # throuhput: 2000 vs. 1000 (better), latency: 20 vs. 10 (worse) # with latency bias mcmB is better - self.mcmA.set_metric_objectives({PerfThroughput.tag: 1, PerfLatencyP99.tag: 3}) + self.mcmA.set_metric_objectives( + {RequestThroughputAvg.tag: 1, RequestLatencyP99.tag: 3} + ) self.assertFalse(self.mcmA.is_better_than(self.mcmB)) diff --git a/genai-perf/tests/test_record.py b/genai-perf/tests/test_record.py index d2b398f4..baf493a0 100644 --- a/genai-perf/tests/test_record.py +++ b/genai-perf/tests/test_record.py @@ -45,13 +45,20 @@ def setUp(self): self.less_is_better_types = { record_types[t] for t in [ - "perf_latency_avg", - "perf_latency_p90", - "perf_latency_p95", - "perf_latency_p99", + "request_latency_min", + "request_latency_max", + "request_latency_avg", + "request_latency_std", + "request_latency_p25", + "request_latency_p50", + "request_latency_p75", + "request_latency_p90", + "request_latency_p95", + "request_latency_p99", "inter_token_latency_min", "inter_token_latency_max", "inter_token_latency_avg", + "inter_token_latency_std", "inter_token_latency_p25", "inter_token_latency_p50", "inter_token_latency_p75", @@ -61,36 +68,58 @@ def setUp(self): "time_to_first_token_min", "time_to_first_token_max", "time_to_first_token_avg", + "time_to_first_token_std", "time_to_first_token_p25", "time_to_first_token_p50", "time_to_first_token_p75", "time_to_first_token_p90", "time_to_first_token_p95", "time_to_first_token_p99", - "gpu_used_memory", - "cpu_used_ram", - "perf_server_compute_infer", - "perf_server_queue", - "perf_client_response_wait", - "perf_server_compute_output", - "perf_client_send_recv", - "perf_server_compute_input", "gpu_power_usage", + "energy_consumption", ] } self.more_is_better_types = { record_types[t] for t in [ - "perf_throughput", - "input_sequence_length", - "output_sequence_length", - "output_token_throughput", - "output_token_throughput_per_request", - "gpu_free_memory", + "request_throughput_avg", + "request_goodput_avg", + "output_token_throughput_avg", + "output_token_throughput_per_request_min", + "output_token_throughput_per_request_max", + "output_token_throughput_per_request_avg", + "output_token_throughput_per_request_std", + "output_token_throughput_per_request_p25", + "output_token_throughput_per_request_p50", + "output_token_throughput_per_request_p75", + "output_token_throughput_per_request_p90", + "output_token_throughput_per_request_p95", + "output_token_throughput_per_request_p99", + "output_sequence_length_min", + "output_sequence_length_max", + "output_sequence_length_avg", + "output_sequence_length_std", + "output_sequence_length_p25", + "output_sequence_length_p50", + "output_sequence_length_p75", + "output_sequence_length_p90", + "output_sequence_length_p95", + "output_sequence_length_p99", + "input_sequence_length_min", + "input_sequence_length_max", + "input_sequence_length_avg", + "input_sequence_length_std", + "input_sequence_length_p25", + "input_sequence_length_p50", + "input_sequence_length_p75", + "input_sequence_length_p90", + "input_sequence_length_p95", + "input_sequence_length_p99", + "gpu_power_limit", "gpu_utilization", - "cpu_available_ram", - "gpu_total_memory", + "total_gpu_memory", + "gpu_memory_used", ] } @@ -200,11 +229,11 @@ def test_value(self): Test the value method """ avg_value = RecordType.get_all_record_types()[ - "perf_latency_p99" + "request_latency_p99" ].value_function()([10, 50, 100, 40]) total_value = RecordType.get_all_record_types()[ - "perf_throughput" + "request_throughput_avg" ].value_function()([10, 50, 100, 40]) self.assertEqual(avg_value, 50) diff --git a/genai-perf/tests/test_results.py b/genai-perf/tests/test_results.py index 8136303c..9ee419d9 100644 --- a/genai-perf/tests/test_results.py +++ b/genai-perf/tests/test_results.py @@ -20,7 +20,7 @@ from genai_perf.config.run.results import Results from genai_perf.measurements.run_constraints import ModelConstraints, RunConstraints from genai_perf.record.types.gpu_power_usage import GPUPowerUsage -from genai_perf.record.types.perf_latency_p99 import PerfLatencyP99 +from genai_perf.record.types.request_latency_p99 import RequestLatencyP99 from tests.test_utils import create_run_config @@ -73,7 +73,7 @@ def test_objective_setting(self): # Changing the objective to latency will result in config_9 being best self._results.set_perf_metric_objectives( - {"test_model": {PerfLatencyP99.tag: 1}} + {"test_model": {RequestLatencyP99.tag: 1}} ) self.assertEqual("test_run_config_9", self._results.run_configs[0].name) diff --git a/genai-perf/tests/test_run_config_measurement.py b/genai-perf/tests/test_run_config_measurement.py index ca356617..9f88c5b7 100644 --- a/genai-perf/tests/test_run_config_measurement.py +++ b/genai-perf/tests/test_run_config_measurement.py @@ -23,8 +23,8 @@ from genai_perf.measurements.run_constraints import RunConstraints from genai_perf.record.types.gpu_power_usage import GPUPowerUsage from genai_perf.record.types.gpu_utilization import GPUUtilization -from genai_perf.record.types.perf_latency_p99 import PerfLatencyP99 -from genai_perf.record.types.perf_throughput import PerfThroughput +from genai_perf.record.types.request_latency_p99 import RequestLatencyP99 +from genai_perf.record.types.request_throughput_avg import RequestThroughputAvg class TestRunConfigMeasurement(unittest.TestCase): @@ -78,40 +78,40 @@ def _create_gpu_metrics(self) -> None: def _create_perf_metrics(self) -> None: # # Record A - self.throughput_recordA = PerfThroughput(1000) - self.latency_recordA = PerfLatencyP99(40) + self.throughput_recordA = RequestThroughputAvg(1000) + self.latency_recordA = RequestLatencyP99(40) self.perf_metricsA = { - PerfThroughput.tag: self.throughput_recordA, - PerfLatencyP99.tag: self.latency_recordA, + RequestThroughputAvg.tag: self.throughput_recordA, + RequestLatencyP99.tag: self.latency_recordA, } # # Record B - self.throughput_recordB = PerfThroughput(500) - self.latency_recordB = PerfLatencyP99(30) + self.throughput_recordB = RequestThroughputAvg(500) + self.latency_recordB = RequestLatencyP99(30) self.perf_metricsB = { - PerfThroughput.tag: self.throughput_recordB, - PerfLatencyP99.tag: self.latency_recordB, + RequestThroughputAvg.tag: self.throughput_recordB, + RequestLatencyP99.tag: self.latency_recordB, } # # Record MM - self.throughput_recordMM_0 = PerfThroughput(1000) - self.latency_recordMM_0 = PerfLatencyP99(20) + self.throughput_recordMM_0 = RequestThroughputAvg(1000) + self.latency_recordMM_0 = RequestLatencyP99(20) - self.throughput_recordMM_1 = PerfThroughput(2000) - self.latency_recordMM_1 = PerfLatencyP99(30) + self.throughput_recordMM_1 = RequestThroughputAvg(2000) + self.latency_recordMM_1 = RequestLatencyP99(30) self.perf_metricsMM_0 = { - PerfThroughput.tag: self.throughput_recordMM_0, - PerfLatencyP99.tag: self.latency_recordMM_0, + RequestThroughputAvg.tag: self.throughput_recordMM_0, + RequestLatencyP99.tag: self.latency_recordMM_0, } self.perf_metricsMM_1 = { - PerfThroughput.tag: self.throughput_recordMM_1, - PerfLatencyP99.tag: self.latency_recordMM_1, + RequestThroughputAvg.tag: self.throughput_recordMM_1, + RequestLatencyP99.tag: self.latency_recordMM_1, } def _create_rcmA(self) -> RunConfigMeasurement: @@ -170,12 +170,12 @@ def test_basic_accessor_methods(self): self.assertEqual(expected_all_perf_metrics_dict, rcmA.get_all_perf_metrics()) self.assertEqual(self.perf_metricsA, rcmA.get_model_perf_metrics("test_model")) self.assertEqual( - self.perf_metricsA[PerfThroughput.tag], - rcmA.get_model_perf_metric("test_model", PerfThroughput.tag), + self.perf_metricsA[RequestThroughputAvg.tag], + rcmA.get_model_perf_metric("test_model", RequestThroughputAvg.tag), ) self.assertEqual( - self.perf_metricsA[PerfThroughput.tag].value(), - rcmA.get_model_perf_metric_value("test_model", PerfThroughput.tag), + self.perf_metricsA[RequestThroughputAvg.tag].value(), + rcmA.get_model_perf_metric_value("test_model", RequestThroughputAvg.tag), ) self.assertEqual( 10, @@ -191,13 +191,13 @@ def test_basic_accessor_methods(self): rcmMM.set_model_weighting(model_weights) expected_weighted_perf_metric_values = { - "modelMM_0": self.perf_metricsMM_0[PerfThroughput.tag].value() * 0.8, - "modelMM_1": self.perf_metricsMM_1[PerfThroughput.tag].value() * 0.2, + "modelMM_0": self.perf_metricsMM_0[RequestThroughputAvg.tag].value() * 0.8, + "modelMM_1": self.perf_metricsMM_1[RequestThroughputAvg.tag].value() * 0.2, } self.assertEqual( expected_weighted_perf_metric_values, - rcmMM.get_weighted_perf_metric_values(PerfThroughput.tag), + rcmMM.get_weighted_perf_metric_values(RequestThroughputAvg.tag), ) def test_set_gpu_metric_objectives(self): @@ -259,7 +259,7 @@ def test_is_better_than_perf_metric(self): # Changing the metric objectives to bias latency # this tips the scale in the favor of RCMB latency_bias_objectives = { - "test_model": {PerfThroughput.tag: 1, PerfLatencyP99.tag: 4} + "test_model": {RequestThroughputAvg.tag: 1, RequestLatencyP99.tag: 4} } rcmA.set_perf_metric_objectives(latency_bias_objectives) rcmB.set_perf_metric_objectives(latency_bias_objectives) @@ -350,12 +350,12 @@ def test_is_passing_perf_constraints(self): rcmA = self._create_rcmA() # RCMA's latency is 40 - model_constraints = ModelConstraints({PerfLatencyP99.tag: 50}) + model_constraints = ModelConstraints({RequestLatencyP99.tag: 50}) run_constraints = RunConstraints({"test_model": model_constraints}) rcmA.set_constraints(run_constraints) self.assertTrue(rcmA.is_passing_constraints()) - model_constraints = ModelConstraints({PerfLatencyP99.tag: 20}) + model_constraints = ModelConstraints({RequestLatencyP99.tag: 20}) run_constraints = RunConstraints({"test_model": model_constraints}) rcmA.set_constraints(run_constraints) self.assertFalse(rcmA.is_passing_constraints()) diff --git a/genai-perf/tests/test_utils.py b/genai-perf/tests/test_utils.py index 44c9e206..8038456b 100644 --- a/genai-perf/tests/test_utils.py +++ b/genai-perf/tests/test_utils.py @@ -36,10 +36,10 @@ from genai_perf.metrics.statistics import Statistics from genai_perf.record.types.gpu_power_usage import GPUPowerUsage from genai_perf.record.types.gpu_utilization import GPUUtilization -from genai_perf.record.types.input_sequence_length import InputSequenceLength -from genai_perf.record.types.output_sequence_length import OutputSequenceLength -from genai_perf.record.types.perf_latency_p99 import PerfLatencyP99 -from genai_perf.record.types.perf_throughput import PerfThroughput +from genai_perf.record.types.input_sequence_length_p99 import InputSequenceLengthP99 +from genai_perf.record.types.output_sequence_length_p99 import OutputSequenceLengthP99 +from genai_perf.record.types.request_latency_p99 import RequestLatencyP99 +from genai_perf.record.types.request_throughput_avg import RequestThroughputAvg from genai_perf.types import GpuId, ModelObjectiveParameters, PerfRecords @@ -71,13 +71,17 @@ def create_perf_metrics( ) -> PerfRecords: perf_metrics: PerfRecords = {} if throughput: - perf_metrics[PerfThroughput.tag] = PerfThroughput(throughput) + perf_metrics[RequestThroughputAvg.tag] = RequestThroughputAvg(throughput) if latency: - perf_metrics[PerfLatencyP99.tag] = PerfLatencyP99(latency) + perf_metrics[RequestLatencyP99.tag] = RequestLatencyP99(latency) if input_seq_length: - perf_metrics[InputSequenceLength.tag] = InputSequenceLength(input_seq_length) + perf_metrics[InputSequenceLengthP99.tag] = InputSequenceLengthP99( + input_seq_length + ) if output_seq_length: - perf_metrics[OutputSequenceLength.tag] = OutputSequenceLength(output_seq_length) + perf_metrics[OutputSequenceLengthP99.tag] = OutputSequenceLengthP99( + output_seq_length + ) return perf_metrics