diff --git a/genai-perf/genai_perf/export_data/console_exporter.py b/genai-perf/genai_perf/export_data/console_exporter.py index dd2f4f57..a7facc69 100644 --- a/genai-perf/genai_perf/export_data/console_exporter.py +++ b/genai-perf/genai_perf/export_data/console_exporter.py @@ -66,8 +66,15 @@ def export(self) -> None: # System metrics are printed after the table for metric in self._metrics.system_metrics: line = metric.name.replace("_", " ").capitalize() - if metric.name == "request_goodput" and not self._args.goodput: - continue + if metric.name == "request_goodput": + if not self._args.goodput: + continue + value = self._stats[metric.name]["avg"] + if value is None: + value = "N/A" + line += f" ({metric.unit}): {value}" + print(line) + continue value = self._stats[metric.name]["avg"] line += f" ({metric.unit}): {value:.2f}" print(line) diff --git a/genai-perf/genai_perf/export_data/csv_exporter.py b/genai-perf/genai_perf/export_data/csv_exporter.py index 10ab7911..1a4ce9df 100644 --- a/genai-perf/genai_perf/export_data/csv_exporter.py +++ b/genai-perf/genai_perf/export_data/csv_exporter.py @@ -94,8 +94,14 @@ def _write_system_metrics(self, csv_writer) -> None: for metric in self._metrics.system_metrics: metric_str = metric.name.replace("_", " ").title() metric_str += f" ({metric.unit})" - if metric.name == "request_goodput" and not self._args.goodput: - continue + if metric.name == "request_goodput": + if not self._args.goodput: + continue + value = self._stats[metric.name]["avg"] + if value is None: + value = "N/A" + csv_writer.writerow([metric_str, f"{value}"]) + continue value = self._stats[metric.name]["avg"] csv_writer.writerow([metric_str, f"{value:.2f}"]) diff --git a/genai-perf/genai_perf/goodput_reporter/__init__.py b/genai-perf/genai_perf/goodput_calculator/__init__.py similarity index 90% rename from genai-perf/genai_perf/goodput_reporter/__init__.py rename to genai-perf/genai_perf/goodput_calculator/__init__.py index 49bbec58..d16be4f5 100644 --- a/genai-perf/genai_perf/goodput_reporter/__init__.py +++ b/genai-perf/genai_perf/goodput_calculator/__init__.py @@ -24,5 +24,5 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from genai_perf.goodput_reporter.llm_goodput_reporter import LLMGoodputReporter -from genai_perf.goodput_reporter.goodput_reporter import GoodputReporter +from genai_perf.goodput_calculator.llm_goodput_calculator import LLMGoodputCalculator +from genai_perf.goodput_calculator.goodput_calculator import GoodputCalculator diff --git a/genai-perf/genai_perf/goodput_reporter/goodput_reporter.py b/genai-perf/genai_perf/goodput_calculator/goodput_calculator.py similarity index 68% rename from genai-perf/genai_perf/goodput_reporter/goodput_reporter.py rename to genai-perf/genai_perf/goodput_calculator/goodput_calculator.py index 8a96de32..223a27d5 100644 --- a/genai-perf/genai_perf/goodput_reporter/goodput_reporter.py +++ b/genai-perf/genai_perf/goodput_calculator/goodput_calculator.py @@ -28,12 +28,12 @@ from abc import ABC, abstractmethod -from typing import Dict, List +from typing import Dict, List, Optional from genai_perf.metrics import Metrics -class GoodputReporter(ABC): - """A base class to report goodput according to SLOs.""" +class GoodputCalculator(ABC): + """A base class to calculate goodput according to SLOs.""" MS_TO_NS_CONVERSION = 1e6 @@ -46,46 +46,48 @@ def __init__( self._goodput_constraints = goodput_constraints self._benchmark_duration = benchmark_duration self._metric = metric - self._goodput = None + self._goodput = "N/A" - def report(self) -> None: - """Template method to report goodput. Subclasses should not modify this method. + def compute(self) -> None: """ - self.set_valid_slos() - self.combine_requests_metric_values() - self.count_good_reqs() - self.compute_goodput() + Compute the goodput result. + + The GoodputCalculator class sets valid SLOs from users' input, aggregates + request metric values, counts the number of good requests, and calculates + the final goodput. + """ + self._set_valid_slos() + self._combine_requests_metric_values() + good_count = self._count_good_reqs() + self._compute_goodput(good_count) @abstractmethod - def set_valid_slos(self) -> None: - """Check user's Service Level Objectives (SLOs) inputs. + def _set_valid_slos(self) -> None: + """ + Check users' Service Level Objectives (SLOs) inputs. Set the valid ones while logging the invalid ones. - To be implemented by subclasses. """ pass @abstractmethod - def combine_requests_metric_values(self) -> None: - """Combine metric values at per request level. - Only the metrics from valid SLOs. - To be implemented by subclasses. + def _combine_requests_metric_values(self) -> None: + """ + Combine values from the metrics that match with the valid SLOs at a + per request level. """ pass @abstractmethod - def count_good_reqs(self) -> None: - """Count the number of good requests according to SLOs. - To be implemented by subclasses. - """ + def _count_good_reqs(self) -> Optional[int]: + """Count the number of good requests according to SLOs.""" pass @abstractmethod - def compute_goodput(self) -> None: - """Compute the goodput. To be implemented by subclasses.""" + def _compute_goodput(self, good_count) -> None: + """Compute the goodput.""" pass @property def goodput(self) -> List[float]: return self._goodput - - + \ No newline at end of file diff --git a/genai-perf/genai_perf/goodput_calculator/llm_goodput_calculator.py b/genai-perf/genai_perf/goodput_calculator/llm_goodput_calculator.py new file mode 100644 index 00000000..4b65da14 --- /dev/null +++ b/genai-perf/genai_perf/goodput_calculator/llm_goodput_calculator.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from typing import Dict, Optional +import genai_perf.logging as logging +from genai_perf.goodput_calculator.goodput_calculator import GoodputCalculator +from genai_perf.metrics.llm_metrics import LLMMetrics + +logger = logging.getLogger(__name__) + +class LLMGoodputCalculator(GoodputCalculator): + """ + A subclass to calculate goodput for LLMs according to LLM-related SLOs. + """ + + def __init__(self, + goodput_constraints: Dict[str, float], + metric: LLMMetrics, + benchmark_duration: float, + ) -> None: + super().__init__(goodput_constraints, metric, benchmark_duration) + # (TMA-1975 related) The order is hardcoded as below due to the hardcoded order + # in LLMMetirc class. We would eventually want to impose some consistent order + # for time-related metrics and throughput related metrics. + self._valid_time_related_names = [ + item.name for item in metric.request_time_metrics + ] + self._valid_throughput_related_names = [ + item.name for item in metric.request_throughput_metrics + ] + self._valid_metric_names = ( + self._valid_time_related_names + self._valid_throughput_related_names + ) + self._has_time_target = False + self._has_throughput_target = False + + def _set_valid_slos(self) -> None: + """ + Check users' Service Level Objectives (SLOs) inputs. + Set the valid ones while logging the invalid ones. + """ + invalid_slos = [] + self._valid_time_related_slos = {} + self._valid_throughput_related_slos = {} + for slo_name, slo_value in self._goodput_constraints.items(): + try: + base_name = self._metric.get_base_name(slo_name) + if base_name in self._valid_metric_names: + if base_name in self._valid_time_related_names: + self._valid_time_related_slos[slo_name] = ( + slo_value * self.MS_TO_NS_CONVERSION + ) + elif base_name in self._valid_throughput_related_names: + self._valid_throughput_related_slos[slo_name] = ( + slo_value + ) + except KeyError: + invalid_slos.append(slo_name) + if self._valid_time_related_slos: + self._has_time_target = True + if self._valid_throughput_related_slos: + self._has_throughput_target = True + if invalid_slos: + valid_slos_list = ', '.join(self._valid_metric_names) + logger.info(f"Invalid SLOs found: {', '.join(invalid_slos)}. " + f"The goodput will be N/A. " + f"Valid SLOs are: {valid_slos_list} in plural forms.") + self._goodput = None + + def _combine_requests_metric_values(self) -> None: + """ + Combine values from the metrics that match with the valid SLOs at a + per request level. + """ + if self.goodput is None: + return + + if self._has_time_target: + requests_time_metric_values = [ + self._metric.data[key] for key in self._valid_time_related_slos + ] + self._combined_requests_time_metric_values = list( + zip(*requests_time_metric_values) + ) + + if self._has_throughput_target: + requests_throughput_metric_values = [ + self._metric.data[key] for key in self._valid_throughput_related_slos + ] + self._combined_requests_throughput_metric_values = list( + zip(*requests_throughput_metric_values) + ) + + def _count_good_reqs(self) -> Optional[int]: + """Count the number of good requests according to SLOs.""" + if self.goodput is None: + return self.goodput + target_time_metric_values = [] + target_throughput_metric_values = [] + if self._has_time_target: + num_of_requests = len(self._combined_requests_time_metric_values) + target_time_metric_values = list(self._valid_time_related_slos.values()) + if self._has_throughput_target: + num_of_requests = len(self._combined_requests_throughput_metric_values) + target_throughput_metric_values = list( + self._valid_throughput_related_slos.values() + ) + + good_req_count = 0 + for idx in range(num_of_requests): + is_good_request = True + request_time_metric_values = [] + request_throughput_metric_values = [] + if self._has_time_target: + request_time_metric_values = ( + self._combined_requests_time_metric_values[idx] + ) + if self._has_throughput_target: + request_throughput_metric_values = ( + self._combined_requests_throughput_metric_values[idx] + ) + for val, slo in zip(request_time_metric_values, target_time_metric_values): + if val > slo: + is_good_request = False + break + else: + for val, slo in zip( + request_throughput_metric_values, target_throughput_metric_values + ): + if val < slo: + is_good_request = False + break + + if is_good_request: + good_req_count += 1 + + return good_req_count + + def _compute_goodput(self, good_count) -> None: + """Compute the goodput.""" + if self.goodput is None: + return + else: + self._goodput = [good_count / self._benchmark_duration] diff --git a/genai-perf/genai_perf/goodput_reporter/llm_goodput_reporter.py b/genai-perf/genai_perf/goodput_reporter/llm_goodput_reporter.py deleted file mode 100644 index ff27dac7..00000000 --- a/genai-perf/genai_perf/goodput_reporter/llm_goodput_reporter.py +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from typing import Dict -from genai_perf.goodput_reporter.goodput_reporter import GoodputReporter -from genai_perf.metrics.llm_metrics import LLMMetrics - - -class LLMGoodputReporter(GoodputReporter): - """A subclass to report goodput for language models.""" - - def __init__(self, - goodput_constraints: Dict[str, float], - metric: LLMMetrics, - benchmark_duration: float, - ) -> None: - super().__init__(goodput_constraints, metric, benchmark_duration) - - - def set_valid_slos(self) -> None: - """Check user's Service Level Objectives (SLOs) inputs. - Set the valid ones or raise error for the invalid ones. - """ - invalid_slos = [] - self._valid_slos = {} - valid_names = [metric.name for metric in self._metric.request_metrics] - - for slo_name, slo_value in self._goodput_constraints.items(): - if self._metric.get_base_name(slo_name) not in valid_names: - invalid_slos.append(slo_name) - else: - self._valid_slos[slo_name] = slo_value * self.MS_TO_NS_CONVERSION - if invalid_slos: - raise ValueError(f"Invalid SLOs found: {', '.join(invalid_slos)}, " - "Make sure these are supported request metrics.") - - def combine_requests_metric_values(self) -> None: - """Combine metric values at per request level. - Only the metrics from valid SLOs. - """ - metric_data = self._metric.data - requests_metric_values = [metric_data[key] for key in self._valid_slos] - self._combined_requests_metric_values = list(zip(*requests_metric_values)) - - def count_good_reqs(self) -> None: - """Count the number of good requests according to SLOs.""" - target_metric_values = list(self._valid_slos.values()) - requests_metric_values = self._combined_requests_metric_values - good_req_count = 0 - - for request_metric_values in requests_metric_values: - if all(val < slo - for val, slo in zip(request_metric_values, target_metric_values) - ): - good_req_count += 1 - self._good_req_count = good_req_count - - def compute_goodput(self) -> None: - """Compute the goodput.""" - self._goodput = [self._good_req_count / self._benchmark_duration] - \ No newline at end of file diff --git a/genai-perf/genai_perf/logging.py b/genai-perf/genai_perf/logging.py index f5cab490..1bdae284 100644 --- a/genai-perf/genai_perf/logging.py +++ b/genai-perf/genai_perf/logging.py @@ -90,6 +90,11 @@ def init_logging() -> None: "level": "DEBUG", "propagate": False, }, + "genai_perf.goodput_calculator.llm_goodput_calculator": { + "handlers": ["console"], + "level": "DEBUG", + "propagate": False, + }, }, } logging.config.dictConfig(LOGGING_CONFIG) diff --git a/genai-perf/genai_perf/main.py b/genai-perf/genai_perf/main.py index 66a3477e..73035335 100755 --- a/genai-perf/genai_perf/main.py +++ b/genai-perf/genai_perf/main.py @@ -94,7 +94,10 @@ def generate_inputs(args: Namespace, tokenizer: Tokenizer) -> None: def calculate_metrics(args: Namespace, tokenizer: Tokenizer) -> ProfileDataParser: if args.endpoint_type in ["embeddings", "rankings"]: - return ProfileDataParser(args.profile_export_file) + return ProfileDataParser( + args.profile_export_file, + goodput_constraints=args.goodput, + ) else: return LLMProfileDataParser( filename=args.profile_export_file, diff --git a/genai-perf/genai_perf/metrics/llm_metrics.py b/genai-perf/genai_perf/metrics/llm_metrics.py index 0e35bda6..357510e7 100755 --- a/genai-perf/genai_perf/metrics/llm_metrics.py +++ b/genai-perf/genai_perf/metrics/llm_metrics.py @@ -26,7 +26,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from typing import List +from typing import List, Union from genai_perf.metrics.metrics import MetricMetadata, Metrics @@ -34,10 +34,16 @@ class LLMMetrics(Metrics): """A simple dataclass that holds core LLM performance metrics.""" - LLM_REQUEST_METRICS = [ + LLM_REQUEST_TIME_METRICS = [ MetricMetadata("time_to_first_token", "ms"), MetricMetadata("inter_token_latency", "ms"), + ] + + LLM_REQUEST_THROUGHPUT_METRICS = [ MetricMetadata("output_token_throughput_per_request", "tokens/sec"), + ] + + LLM_REQUEST_METRICS = LLM_REQUEST_TIME_METRICS + LLM_REQUEST_THROUGHPUT_METRICS + [ MetricMetadata("output_sequence_length", "tokens"), MetricMetadata("input_sequence_length", "tokens"), ] @@ -45,7 +51,6 @@ class LLMMetrics(Metrics): LLM_SYSTEM_METRICS = [ # (TMA-1977) Make the unit consistent with statistics dict (e.g. tokens/sec) MetricMetadata("output_token_throughput", "per sec"), - MetricMetadata("request_goodput", "per sec"), ] def __init__( @@ -59,17 +64,16 @@ def __init__( output_sequence_lengths: List[int] = [], input_sequence_lengths: List[int] = [], chunked_inter_token_latencies: List[List[int]] = [[]], - request_goodputs: List[float] = [], + request_goodputs: Union[List[float], None] = [], ) -> None: - super().__init__(request_throughputs, request_latencies) + super().__init__(request_throughputs, request_latencies, request_goodputs) self.time_to_first_tokens = time_to_first_tokens self.inter_token_latencies = inter_token_latencies self.output_token_throughputs = output_token_throughputs self.output_token_throughputs_per_request = output_token_throughputs_per_request self.output_sequence_lengths = output_sequence_lengths self.input_sequence_lengths = input_sequence_lengths - self.request_goodputs = request_goodputs - + # Keeping chunked ITL (old) as a WAR to preserve visualization. # Excluded from data. self._chunked_inter_token_latencies = chunked_inter_token_latencies @@ -83,7 +87,6 @@ def __init__( ) self._base_names["output_sequence_lengths"] = "output_sequence_length" self._base_names["input_sequence_lengths"] = "input_sequence_length" - self._base_names["request_goodputs"] = "request_goodput" @property def request_metrics(self) -> List[MetricMetadata]: @@ -109,6 +112,14 @@ def system_metrics(self) -> List[MetricMetadata]: # base metrics first and then task specific metrics. Uncomment the below # line to enable this order: # return base_metrics + self.LLM_SYSTEM_METRICS - # Rightnow the goodput will be printed out before throughput if there is - # goodput. return self.LLM_SYSTEM_METRICS + base_metrics + + @property + def request_time_metrics(self) -> List[MetricMetadata]: + base_metrics = super().request_time_metrics + return self.LLM_REQUEST_TIME_METRICS + base_metrics + + @property + def request_throughput_metrics(self) -> List[MetricMetadata]: + base_metrics = super().request_throughput_metrics + return self.LLM_REQUEST_THROUGHPUT_METRICS + base_metrics diff --git a/genai-perf/genai_perf/metrics/metrics.py b/genai-perf/genai_perf/metrics/metrics.py index 7e047094..9a5b1e9e 100755 --- a/genai-perf/genai_perf/metrics/metrics.py +++ b/genai-perf/genai_perf/metrics/metrics.py @@ -27,7 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from dataclasses import dataclass -from typing import List +from typing import List, Union @dataclass @@ -39,25 +39,33 @@ class MetricMetadata: class Metrics: """A base class that contains common request level metrics.""" - REQUEST_METRICS = [ + REQUEST_TIME_METRICS = [ MetricMetadata("request_latency", "ms"), ] + REQUEST_THROUGHPUT_METRICS = [] + + REQUEST_METRICS = REQUEST_TIME_METRICS + REQUEST_THROUGHPUT_METRICS + SYSTEM_METRICS = [ # (TMA-1977) Make the unit consistent with statistics dict (e.g. tokens/sec) MetricMetadata("request_throughput", "per sec"), + MetricMetadata("request_goodput", "per sec"), ] def __init__( self, request_throughputs: List[float] = [], request_latencies: List[int] = [], + request_goodputs: Union[List[float], None] = [], ) -> None: self.request_throughputs = request_throughputs self.request_latencies = request_latencies + self.request_goodputs = request_goodputs self._base_names = { "request_throughputs": "request_throughput", "request_latencies": "request_latency", + "request_goodputs": "request_goodput", } def __repr__(self): @@ -75,6 +83,14 @@ def request_metrics(self) -> List[MetricMetadata]: def system_metrics(self) -> List[MetricMetadata]: return self.SYSTEM_METRICS + @property + def request_time_metrics(self) -> List[MetricMetadata]: + return self.REQUEST_TIME_METRICS + + @property + def request_throughput_metrics(self) -> List[MetricMetadata]: + return self.REQUEST_THROUGHPUT_METRICS + @property def data(self) -> dict: """Returns all the metrics.""" diff --git a/genai-perf/genai_perf/metrics/statistics.py b/genai-perf/genai_perf/metrics/statistics.py index 8ba37a8a..c5d429d3 100755 --- a/genai-perf/genai_perf/metrics/statistics.py +++ b/genai-perf/genai_perf/metrics/statistics.py @@ -28,7 +28,7 @@ from collections import defaultdict from pathlib import Path -from typing import Dict, List, Union +from typing import Dict, List, Optional, Union import numpy as np import pandas as pd @@ -69,20 +69,29 @@ def __init__(self, metrics: Metrics): self._calculate_minmax(data, attr) self._calculate_std(data, attr) - def _should_skip(self, data: List[Union[int, float]], attr: str) -> bool: + def _should_skip(self, data: Optional[List[Union[int, float]]], attr: str) -> bool: """Checks if some metrics should be skipped.""" # No data points - if len(data) == 0: + if data is None: + return False + elif len(data) == 0: return True # Skip ITL when non-streaming (all zero) elif attr == "inter_token_latencies" and sum(data) == 0: return True return False - def _calculate_mean(self, data: List[Union[int, float]], attr: str) -> None: - avg = np.mean(data) - setattr(self, "avg_" + attr, avg) - self._stats_dict[attr]["avg"] = float(avg) + def _calculate_mean( + self, data: Optional[List[Union[int, float]]], attr: str + ) -> None: + if data is None: + avg = None + setattr(self, "avg_" + attr, avg) + self._stats_dict[attr]["avg"] = avg + else: + avg = np.mean(data) + setattr(self, "avg_" + attr, avg) + self._stats_dict[attr]["avg"] = float(avg) def _calculate_percentiles(self, data: List[Union[int, float]], attr: str) -> None: p25, p50, p75 = np.percentile(data, [25, 50, 75]) @@ -129,9 +138,7 @@ def _scale(self, metric: float, factor: float = 1 / 1e6) -> float: def _add_units(self, key) -> None: if self._is_time_metric(key): self._stats_dict[key]["unit"] = "ms" - elif key == "request_throughput": - self._stats_dict[key]["unit"] = "requests/sec" - elif key == "request_goodput": + elif key == "request_throughput" or key == "request_goodput": self._stats_dict[key]["unit"] = "requests/sec" elif key.startswith("output_token_throughput"): self._stats_dict[key]["unit"] = "tokens/sec" diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index 4d64b5bd..8ce519f5 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -248,23 +248,14 @@ def _check_goodput_args(args): """ Parse and check goodput args """ - ''' - if args.goodput: - args.goodput = parse_goodput(args.goodput) - if 'ttft' not in args.goodput and 'itl' not in args.goodput: - raise argparse.ArgumentTypeError( - f"Invalid goodput constraints format: {args.goodput}. " - "Expected format is 'ttft:x itl:y', where x and y are numbers in milliseconds." - ) - if 'ttft' not in args.goodput: - args.goodput['ttft'] = 1e9 - if 'itl' not in args.goodput: - args.goodput['itl'] = 1e9 - if args.goodput['ttft'] < 0 or args.goodput['itl'] < 0: - raise ValueError("Goodput constraint values must be non-negative.") - ''' if args.goodput: args.goodput = parse_goodput(args.goodput) + for target_metric, target_val in args.goodput.items(): + if target_val < 0: + raise ValueError( + f"Invalid value found, {target_metric}: {target_val}. " + f"The SLO value should be non-negative. " + ) return args def _set_artifact_paths(args: argparse.Namespace) -> argparse.Namespace: @@ -315,8 +306,10 @@ def parse_goodput(values): constraints[target_metric] = float(target_val) except ValueError: raise argparse.ArgumentTypeError( - f"Invalid goodput constraints format: {values}. " - "Expected format is 'ttft:x itl:y', where x and y are numbers in milliseconds." + f"Invalid format for goodput constraints: {values}. " + f"The expected format is 'key:value' pairs, where the key should be a " + f"valid service level objective name and the value should be a number " + f"representing either milliseconds or a throughput value per second." ) return constraints @@ -693,8 +686,12 @@ def _add_goodput_args(parser): "-g", nargs='+', required=False, - help="The goodput constraints are in the format of 'ttft:x itl:y', " - "where x and y are numbers in milliseconds." + help="An option to provide Service Level Objectives to compute goodput. " + "Specify goodput constraints as 'key:value' pairs, where the key is a " + "valid Service Level Objective name, and the value is a number representing " + "either milliseconds or a throughput value per second. For example, " + "'request_latencies:300' or 'output_token_throughputs_per_request:600'. " + "Multiple key:value pairs can be provided, separated by spaces. " ) def get_extra_inputs_as_dict(args: argparse.Namespace) -> dict: diff --git a/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py b/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py index fa617955..cdeeab0c 100755 --- a/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py +++ b/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py @@ -31,7 +31,7 @@ from pathlib import Path from typing import Dict, List, Tuple -from genai_perf.goodput_reporter.llm_goodput_reporter import LLMGoodputReporter +from genai_perf.goodput_calculator.llm_goodput_calculator import LLMGoodputCalculator from genai_perf.metrics import LLMMetrics, Metrics from genai_perf.profile_data_parser.profile_data_parser import ( ProfileDataParser, @@ -74,7 +74,7 @@ def __init__( ) -> None: self._tokenizer = tokenizer self._goodput_constraints = goodput_constraints - super().__init__(filename) + super().__init__(filename, goodput_constraints) def _parse_requests(self, requests: dict) -> Metrics: """Parse each requests in profile export data to extract key metrics.""" @@ -166,14 +166,14 @@ def _parse_requests(self, requests: dict) -> Metrics: # request goodput if self._goodput_constraints: - llm_goodput_reporter = LLMGoodputReporter( + llm_goodput_calculator = LLMGoodputCalculator( self._goodput_constraints, llm_metric, benchmark_duration, ) - llm_goodput_reporter.report() - llm_metric.request_goodputs = llm_goodput_reporter.goodput + llm_goodput_calculator.compute() + llm_metric.request_goodputs = llm_goodput_calculator.goodput return llm_metric diff --git a/genai-perf/genai_perf/profile_data_parser/profile_data_parser.py b/genai-perf/genai_perf/profile_data_parser/profile_data_parser.py index 74eb48a2..beb4edad 100755 --- a/genai-perf/genai_perf/profile_data_parser/profile_data_parser.py +++ b/genai-perf/genai_perf/profile_data_parser/profile_data_parser.py @@ -28,8 +28,9 @@ from enum import Enum, auto from pathlib import Path -from typing import List, Tuple +from typing import Dict, List, Tuple +from genai_perf.goodput_calculator.llm_goodput_calculator import LLMGoodputCalculator from genai_perf.metrics import Metrics, Statistics from genai_perf.utils import load_json @@ -49,7 +50,12 @@ class ProfileDataParser: extract core metrics and calculate various performance statistics. """ - def __init__(self, filename: Path) -> None: + def __init__( + self, + filename: Path, + goodput_constraints: Dict[str, float] = {}, + ) -> None: + self._goodput_constraints = goodput_constraints data = load_json(filename) self._get_profile_metadata(data) self._parse_profile_data(data) @@ -136,11 +142,23 @@ def _parse_requests(self, requests: dict) -> Metrics: benchmark_duration = (max_res_timestamp - min_req_timestamp) / 1e9 # to seconds request_throughputs = [len(requests) / benchmark_duration] - return Metrics( + metric = Metrics( request_throughputs, request_latencies, ) + # request goodput + if self._goodput_constraints: + llm_goodput_calculator = LLMGoodputCalculator( + self._goodput_constraints, + metric, + benchmark_duration, + ) + + llm_goodput_calculator.compute() + metric.request_goodputs = llm_goodput_calculator.goodput + return metric + def get_statistics(self, infer_mode: str, load_level: str) -> Statistics: """Return profile statistics if it exists.""" if (infer_mode, load_level) not in self._profile_results: diff --git a/genai-perf/tests/test_llm_metrics.py b/genai-perf/tests/test_llm_metrics.py index 68de81b2..d2f93a0d 100644 --- a/genai-perf/tests/test_llm_metrics.py +++ b/genai-perf/tests/test_llm_metrics.py @@ -74,9 +74,9 @@ def test_llm_metric_system_metrics(self) -> None: assert len(sys_metrics) == 3 assert sys_metrics[0].name == "output_token_throughput" assert sys_metrics[0].unit == "per sec" - assert sys_metrics[1].name == "request_goodput" + assert sys_metrics[1].name == "request_throughput" assert sys_metrics[1].unit == "per sec" - assert sys_metrics[2].name == "request_throughput" + assert sys_metrics[2].name == "request_goodput" assert sys_metrics[2].unit == "per sec" def test_llm_metrics_get_base_name(self) -> None: diff --git a/genai-perf/tests/test_metrics.py b/genai-perf/tests/test_metrics.py index 2af489fc..6d310381 100644 --- a/genai-perf/tests/test_metrics.py +++ b/genai-perf/tests/test_metrics.py @@ -48,9 +48,11 @@ def test_metric_system_metrics(self) -> None: request_latencies=[3, 44], ) sys_metrics = m.system_metrics - assert len(sys_metrics) == 1 + assert len(sys_metrics) == 2 assert sys_metrics[0].name == "request_throughput" assert sys_metrics[0].unit == "per sec" + assert sys_metrics[1].name == "request_goodput" + assert sys_metrics[1].unit == "per sec" def test_metrics_get_base_name(self) -> None: """Test get_base_name method in Metrics class."""