Refactor and enhance code to support goodput options in both LLM and …

…embeddings usages
triton-inference-server · Aug 13, 2024 · 5f46340 · 5f46340
1 parent 5d01561
commit 5f46340
Show file tree

Hide file tree

Showing 16 changed files with 327 additions and 170 deletions.
diff --git a/genai-perf/genai_perf/export_data/console_exporter.py b/genai-perf/genai_perf/export_data/console_exporter.py
@@ -66,8 +66,15 @@ def export(self) -> None:
         # System metrics are printed after the table
         for metric in self._metrics.system_metrics:
             line = metric.name.replace("_", " ").capitalize()
-            if metric.name == "request_goodput" and not self._args.goodput:
-                continue
+            if metric.name == "request_goodput":
+                if not self._args.goodput:
+                    continue
+                value = self._stats[metric.name]["avg"]
+                if value is None:
+                    value = "N/A"
+                    line += f" ({metric.unit}): {value}"
+                    print(line)
+                    continue
             value = self._stats[metric.name]["avg"]
             line += f" ({metric.unit}): {value:.2f}"
             print(line)

diff --git a/genai-perf/genai_perf/export_data/csv_exporter.py b/genai-perf/genai_perf/export_data/csv_exporter.py
@@ -94,8 +94,14 @@ def _write_system_metrics(self, csv_writer) -> None:
         for metric in self._metrics.system_metrics:
             metric_str = metric.name.replace("_", " ").title()
             metric_str += f" ({metric.unit})"
-            if metric.name == "request_goodput" and not self._args.goodput:
-                continue
+            if metric.name == "request_goodput":
+                if not self._args.goodput:
+                    continue
+                value = self._stats[metric.name]["avg"]
+                if value is None:
+                    value = "N/A"
+                    csv_writer.writerow([metric_str, f"{value}"])
+                    continue
             value = self._stats[metric.name]["avg"]
             csv_writer.writerow([metric_str, f"{value:.2f}"])
 

diff --git a/...f/genai_perf/goodput_reporter/__init__.py → ...genai_perf/goodput_calculator/__init__.py b/...f/genai_perf/goodput_reporter/__init__.py → ...genai_perf/goodput_calculator/__init__.py
@@ -24,5 +24,5 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from genai_perf.goodput_reporter.llm_goodput_reporter import LLMGoodputReporter
-from genai_perf.goodput_reporter.goodput_reporter import GoodputReporter
+from genai_perf.goodput_calculator.llm_goodput_calculator import LLMGoodputCalculator
+from genai_perf.goodput_calculator.goodput_calculator import GoodputCalculator
diff --git a/...perf/goodput_reporter/goodput_reporter.py → .../goodput_calculator/goodput_calculator.py b/...perf/goodput_reporter/goodput_reporter.py → .../goodput_calculator/goodput_calculator.py
@@ -28,12 +28,12 @@
 
 
 from abc import ABC, abstractmethod
-from typing import Dict, List
+from typing import Dict, List, Optional
 
 from genai_perf.metrics import Metrics
 
-class GoodputReporter(ABC):
-    """A base class to report goodput according to SLOs."""
+class GoodputCalculator(ABC):
+    """A base class to calculate goodput according to SLOs."""
 
     MS_TO_NS_CONVERSION = 1e6
 
@@ -46,46 +46,48 @@ def __init__(
         self._goodput_constraints = goodput_constraints
         self._benchmark_duration = benchmark_duration
         self._metric = metric
-        self._goodput = None
+        self._goodput = "N/A"
 
-    def report(self) -> None:
-        """Template method to report goodput. Subclasses should not modify this method.
+    def compute(self) -> None:
         """
-        self.set_valid_slos()
-        self.combine_requests_metric_values()
-        self.count_good_reqs()
-        self.compute_goodput()
+        Compute the goodput result.
+
+        The GoodputCalculator class sets valid SLOs from users' input, aggregates
+        request metric values, counts the number of good requests, and calculates
+        the final goodput.
+        """
+        self._set_valid_slos()
+        self._combine_requests_metric_values()
+        good_count = self._count_good_reqs()
+        self._compute_goodput(good_count)
 
     @abstractmethod
-    def set_valid_slos(self) -> None:
-        """Check user's Service Level Objectives (SLOs) inputs. 
+    def _set_valid_slos(self) -> None:
+        """
+        Check users' Service Level Objectives (SLOs) inputs. 
         Set the valid ones while logging the invalid ones. 
-        To be implemented by subclasses.
         """
         pass
 
     @abstractmethod
-    def combine_requests_metric_values(self) -> None:
-        """Combine metric values at per request level.
-        Only the metrics from valid SLOs.  
-        To be implemented by subclasses.
+    def _combine_requests_metric_values(self) -> None:
+        """
+        Combine values from the metrics that match with the valid SLOs at a
+        per request level.  
         """
         pass
 
     @abstractmethod
-    def count_good_reqs(self) -> None:
-        """Count the number of good requests according to SLOs. 
-        To be implemented by subclasses.
-        """
+    def _count_good_reqs(self) -> Optional[int]:
+        """Count the number of good requests according to SLOs."""
         pass
 
     @abstractmethod
-    def compute_goodput(self) -> None:
-        """Compute the goodput. To be implemented by subclasses."""
+    def _compute_goodput(self, good_count) -> None:
+        """Compute the goodput."""
         pass
 
     @property
     def goodput(self) -> List[float]:
         return self._goodput
-
-
+
diff --git a/genai-perf/genai_perf/goodput_calculator/llm_goodput_calculator.py b/genai-perf/genai_perf/goodput_calculator/llm_goodput_calculator.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python3
+
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from typing import Dict, Optional
+import genai_perf.logging as logging
+from genai_perf.goodput_calculator.goodput_calculator import GoodputCalculator
+from genai_perf.metrics.llm_metrics import LLMMetrics
+
+logger = logging.getLogger(__name__)
+
+class LLMGoodputCalculator(GoodputCalculator):
+    """
+    A subclass to calculate goodput for LLMs according to LLM-related SLOs.
+    """
+
+    def __init__(self,
+                goodput_constraints: Dict[str, float],
+                metric: LLMMetrics,
+                benchmark_duration: float,
+    ) -> None:
+        super().__init__(goodput_constraints, metric, benchmark_duration)
+        # (TMA-1975 related) The order is hardcoded as below due to the hardcoded order
+        # in LLMMetirc class. We would eventually want to impose some consistent order 
+        # for time-related metrics and throughput related metrics.
+        self._valid_time_related_names = [
+            item.name for item in metric.request_time_metrics
+        ]
+        self._valid_throughput_related_names = [
+            item.name for item in metric.request_throughput_metrics
+        ]
+        self._valid_metric_names = (
+            self._valid_time_related_names + self._valid_throughput_related_names
+        )
+        self._has_time_target = False
+        self._has_throughput_target = False
+
+    def _set_valid_slos(self) -> None:
+        """
+        Check users' Service Level Objectives (SLOs) inputs. 
+        Set the valid ones while logging the invalid ones. 
+        """
+        invalid_slos = []
+        self._valid_time_related_slos = {}
+        self._valid_throughput_related_slos = {}
+        for slo_name, slo_value in self._goodput_constraints.items():
+            try:
+                base_name = self._metric.get_base_name(slo_name)
+                if base_name in self._valid_metric_names:
+                    if base_name in self._valid_time_related_names:
+                        self._valid_time_related_slos[slo_name] = (
+                            slo_value * self.MS_TO_NS_CONVERSION
+                        )
+                    elif base_name in self._valid_throughput_related_names:
+                        self._valid_throughput_related_slos[slo_name] = (
+                            slo_value 
+                        )
+            except KeyError:            
+                invalid_slos.append(slo_name)
+        if self._valid_time_related_slos:
+            self._has_time_target = True
+        if self._valid_throughput_related_slos:
+            self._has_throughput_target = True 
+        if invalid_slos:
+            valid_slos_list = ', '.join(self._valid_metric_names)
+            logger.info(f"Invalid SLOs found: {', '.join(invalid_slos)}. "
+                        f"The goodput will be N/A. "
+                        f"Valid SLOs are: {valid_slos_list} in plural forms.")
+            self._goodput = None
+
+    def _combine_requests_metric_values(self) -> None:
+        """
+        Combine values from the metrics that match with the valid SLOs at a
+        per request level.  
+        """
+        if self.goodput is None:
+            return
+
+        if self._has_time_target:
+            requests_time_metric_values = [
+                self._metric.data[key] for key in self._valid_time_related_slos
+            ]
+            self._combined_requests_time_metric_values = list(
+                zip(*requests_time_metric_values)
+            )
+
+        if self._has_throughput_target:
+            requests_throughput_metric_values = [
+                self._metric.data[key] for key in self._valid_throughput_related_slos
+            ] 
+            self._combined_requests_throughput_metric_values = list(
+                zip(*requests_throughput_metric_values)
+            )
+
+    def _count_good_reqs(self) -> Optional[int]:
+        """Count the number of good requests according to SLOs."""
+        if self.goodput is None:
+            return self.goodput        
+        target_time_metric_values = []
+        target_throughput_metric_values = []
+        if self._has_time_target:
+            num_of_requests = len(self._combined_requests_time_metric_values)
+            target_time_metric_values = list(self._valid_time_related_slos.values())
+        if self._has_throughput_target:
+            num_of_requests = len(self._combined_requests_throughput_metric_values)
+            target_throughput_metric_values = list(
+                self._valid_throughput_related_slos.values()
+            )                        
+
+        good_req_count = 0
+        for idx in range(num_of_requests):
+            is_good_request = True
+            request_time_metric_values = []
+            request_throughput_metric_values = []
+            if self._has_time_target:
+                request_time_metric_values = (
+                    self._combined_requests_time_metric_values[idx]
+                )
+            if self._has_throughput_target:
+                request_throughput_metric_values = (
+                    self._combined_requests_throughput_metric_values[idx]
+                )
+            for val, slo in zip(request_time_metric_values, target_time_metric_values):
+                if val > slo:
+                    is_good_request = False
+                    break
+            else:
+                for val, slo in zip(
+                    request_throughput_metric_values, target_throughput_metric_values
+                ):
+                    if val < slo:
+                        is_good_request = False
+                        break
+
+            if is_good_request:
+                good_req_count += 1
+
+        return good_req_count
+
+    def _compute_goodput(self, good_count) -> None:
+        """Compute the goodput."""
+        if self.goodput is None:
+            return
+        else:
+            self._goodput = [good_count / self._benchmark_duration]
diff --git a/genai-perf/genai_perf/goodput_reporter/llm_goodput_reporter.py b/genai-perf/genai_perf/goodput_reporter/llm_goodput_reporter.py