Switch to using GenericDataset dataclass for holding retrieved data (#…

…140) * Create Generic Dataset (#124) * hook up synthetic retriever to the new factory (#125) * Remove dataset-format-transformer since it is no longer necessary (#127) * Update type from Dict to add details (#129) * Add multi-file support in retriever (#128) * Add client-side batching for synthetic (#133) * Add remaining converters, clean up retriever factory (#135) * Move configuration checks to converters (#136) * Add multi-file support for synthetic (#137) * Support client-side batching for legacy completions endpoint (#138) * Allow image retrieval endpoint to use synthetic data (#139) * Update unit tests after GenericDataset redesign (#142) --------- Co-authored-by: Elias Bermudez <[email protected]>
triton-inference-server · Oct 19, 2024 · d134ae8 · d134ae8
1 parent fb4da44
commit d134ae8
Show file tree

Hide file tree

Showing 38 changed files with 1,696 additions and 1,742 deletions.
diff --git a/.github/workflows/python-package-genai.yml b/.github/workflows/python-package-genai.yml
@@ -39,7 +39,7 @@ jobs:
       fail-fast: false
       matrix:
         os: ["ubuntu-22.04"]
-        python-version: ["3.8", "3.10"]
+        python-version: ["3.10"]
 
     steps:
     - uses: actions/checkout@v3

diff --git a/genai-perf/README.md b/genai-perf/README.md
@@ -89,7 +89,7 @@ genai-perf --help
 Since GenAI-Perf depends on Perf Analyzer,
 you'll need to install the Perf Analyzer binary:
 
-### Install Perf Analyzer (Ubuntu, Python 3.8+)
+### Install Perf Analyzer (Ubuntu, Python 3.10+)
 
 **NOTE**: you must already have CUDA 12 installed
 (checkout the [CUDA installation guide](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html)).
@@ -282,8 +282,8 @@ When the dataset is synthetic, you can specify the following options:
 
 When the dataset is coming from a file, you can specify the following
 options:
-* `--input-file <path>`: The input file containing the prompts to
-  use for benchmarking as JSON objects.
+* `--input-file <path>`: The input file or directory containing the prompts or
+  filepaths to images to use for benchmarking as JSON objects.
 
 For any dataset, you can specify the following options:
 * `--output-tokens-mean <int>`: The mean number of tokens in each output. Ensure
@@ -420,9 +420,13 @@ Alternatively, a string representing a json formatted dict can be provided.
 
 ##### `--input-file <path>`
 
-The input file containing the prompts to use for profiling.
-Each line should be a JSON object with a 'text' field in JSONL format.
-Example: {\"text\": \"Your prompt here\"}"
+The input file or directory containing the content to use for
+profiling. To use synthetic files for a converter that needs
+multiple files, prefix the path with 'synthetic:', followed by a
+comma-separated list of filenames. The synthetic filenames should not have
+extensions. For example, 'synthetic:queries,passages'.
+Each line should be a JSON object with a 'text' or 'image' field
+in JSONL format. Example: {\"text\": \"Your prompt here\"}"
 
 ##### `--num-prompts <int>`
 

diff --git a/genai-perf/genai_perf/inputs/converters/base_converter.py b/genai-perf/genai_perf/inputs/converters/base_converter.py
@@ -25,22 +25,35 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import random
-from typing import Dict, List, Union, cast
+from typing import Any, Dict
 
 from genai_perf.exceptions import GenAIPerfException
 from genai_perf.inputs.input_constants import ModelSelectionStrategy
 from genai_perf.inputs.inputs_config import InputsConfig
+from genai_perf.inputs.retrievers.generic_dataset import GenericDataset
 
 
 class BaseConverter:
+    """
+    Base class for all converters that take generic JSON payloads
+    and convert them to endpoint-specific payloads.
+    """
 
-    _CONTENT_NAMES: List[str]
+    def check_config(self, config: InputsConfig) -> None:
+        """
+        Check whether the provided configuration is valid for this converter.
+
+        Throws a GenAIPerfException if the configuration is invalid.
+        """
+        pass
 
-    def convert(self, generic_dataset: Dict, config: InputsConfig) -> Dict:
+    def convert(
+        self, generic_dataset: GenericDataset, config: InputsConfig
+    ) -> Dict[Any, Any]:
         """
         Construct a request body using the endpoint specific request format.
         """
-        raise NotImplementedError
+        raise NotImplementedError("This method should be implemented by subclasses.")
 
     def _select_model_name(self, config: InputsConfig, index: int) -> str:
         if config.model_selection_strategy == ModelSelectionStrategy.ROUND_ROBIN:
@@ -51,33 +64,3 @@ def _select_model_name(self, config: InputsConfig, index: int) -> str:
             raise GenAIPerfException(
                 f"Model selection strategy '{config.model_selection_strategy}' is unsupported"
             )
-
-    def _construct_text_payload_batch_agnostic(
-        self, batch_size_text: int, input_data: Union[Dict, List]
-    ) -> Union[str, List]:
-        """
-        Construct text payload content for non-chat based LLM converters.
-        Allow batched and unbatched input data.
-        """
-        if batch_size_text == 1:
-            input_data = cast(Dict, input_data)
-            return self._construct_text_payload(input_data)
-        else:
-            input_data = cast(List, input_data)
-            return self._construct_batched_text_payload(input_data)
-
-    def _construct_text_payload(self, input_data: Dict) -> str:
-        """
-        Construct text payload content for non-chat based LLM converters.
-        Since there are no roles or turns in non-chat LLM endpoints, all the
-        (pre-defined) text contents are concatenated into a single text prompt.
-        """
-        contents = [v for k, v in input_data.items() if k in self._CONTENT_NAMES]
-        return " ".join(contents)
-
-    def _construct_batched_text_payload(self, input_data: List) -> List:
-        """
-        Construct batched text payload content for non-chat based LLM converters.
-        """
-        contents = [item["text"] for item in input_data]
-        return contents
diff --git a/genai-perf/genai_perf/inputs/converters/openai_chat_completions_converter.py b/genai-perf/genai_perf/inputs/converters/openai_chat_completions_converter.py
@@ -25,60 +25,103 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import random
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Union
 
+from genai_perf.exceptions import GenAIPerfException
 from genai_perf.inputs.converters.base_converter import BaseConverter
-from genai_perf.inputs.input_constants import DEFAULT_OUTPUT_TOKENS_MEAN, OutputFormat
+from genai_perf.inputs.input_constants import (
+    DEFAULT_BATCH_SIZE,
+    DEFAULT_OUTPUT_TOKENS_MEAN,
+    OutputFormat,
+)
 from genai_perf.inputs.inputs_config import InputsConfig
+from genai_perf.inputs.retrievers.generic_dataset import DataRow, GenericDataset
 
 
 class OpenAIChatCompletionsConverter(BaseConverter):
 
-    def convert(self, generic_dataset: Dict, config: InputsConfig) -> Dict:
+    def check_config(self, config: InputsConfig) -> None:
+        if config.output_format == OutputFormat.IMAGE_RETRIEVAL:
+            if config.add_stream:
+                raise GenAIPerfException(
+                    f"The --streaming option is not supported for {config.output_format.to_lowercase()}."
+                )
+        elif (
+            config.output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS
+            or config.output_format == OutputFormat.OPENAI_VISION
+        ):
+            if config.batch_size_text != DEFAULT_BATCH_SIZE:
+                raise GenAIPerfException(
+                    f"The --batch-size-text flag is not supported for {config.output_format.to_lowercase()}."
+                )
+            if config.batch_size_image != DEFAULT_BATCH_SIZE:
+                raise GenAIPerfException(
+                    f"The --batch-size-image flag is not supported for {config.output_format.to_lowercase()}."
+                )
+
+    def convert(
+        self, generic_dataset: GenericDataset, config: InputsConfig
+    ) -> Dict[Any, Any]:
         request_body: Dict[str, Any] = {"data": []}
 
-        for index, entry in enumerate(generic_dataset["rows"]):
-            model_name = self._select_model_name(config, index)
+        for file_data in generic_dataset.files_data.values():
+            for index, row in enumerate(file_data.rows):
+                payload = self._create_payload(index, row, config)
+                request_body["data"].append({"payload": [payload]})
+
+        return request_body
 
-            content: Any = []
-            if config.output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS:
-                content = entry["text"]
-            else:
-                # Treat single batch and multi-batch entries the same way
-                entries = entry if isinstance(entry, list) else [entry]
-                for _entry in entries:
-                    content += self._add_multi_modal_content(_entry)
+    def _create_payload(
+        self, index: int, row: DataRow, config: InputsConfig
+    ) -> Dict[Any, Any]:
+        model_name = self._select_model_name(config, index)
+        content = self._retrieve_content(row, config)
 
-            payload = {
-                "model": model_name,
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": content,
-                    }
-                ],
-            }
+        payload = {
+            "model": model_name,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": content,
+                }
+            ],
+        }
 
-            self._add_request_params(payload, config)
-            request_body["data"].append({"payload": [payload]})
+        self._add_request_params(payload, config)
+        return payload
 
-        return request_body
+    def _retrieve_content(
+        self, row: DataRow, config: InputsConfig
+    ) -> Union[str, List[Dict[Any, Any]]]:
+        content: Union[str, List[Dict[Any, Any]]] = ""
+        if config.output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS:
+            content = row.texts[0]
+        elif (
+            config.output_format == OutputFormat.OPENAI_VISION
+            or config.output_format == OutputFormat.IMAGE_RETRIEVAL
+        ):
+            content = self._add_multi_modal_content(row)
+        else:
+            raise GenAIPerfException(
+                f"Output format {config.output_format} is not supported"
+            )
+        return content
 
-    def _add_multi_modal_content(self, entry: Dict) -> List[Dict]:
-        content = []
-        if "text" in entry:
+    def _add_multi_modal_content(self, entry: DataRow) -> List[Dict[Any, Any]]:
+        content: List[Dict[Any, Any]] = []
+        for text in entry.texts:
             content.append(
                 {
                     "type": "text",
-                    "text": entry["text"],
+                    "text": text,
                 }
             )
-        if "image" in entry:
+        for image in entry.images:
             content.append(
                 {
                     "type": "image_url",
                     "image_url": {
-                        "url": entry["image"],
+                        "url": image,
                     },
                 }
             )

diff --git a/genai-perf/genai_perf/inputs/converters/openai_completions_converter.py b/genai-perf/genai_perf/inputs/converters/openai_completions_converter.py
@@ -30,36 +30,27 @@
 from genai_perf.inputs.converters.base_converter import BaseConverter
 from genai_perf.inputs.input_constants import DEFAULT_OUTPUT_TOKENS_MEAN
 from genai_perf.inputs.inputs_config import InputsConfig
+from genai_perf.inputs.retrievers.generic_dataset import GenericDataset
 
 
 class OpenAICompletionsConverter(BaseConverter):
 
-    # TODO (TPA-430): This works for great for synthetic and input file approaches
-    # but a bit tedious for dataset case as we need to specify the content names
-    # for each dataset. This is because a dataset can be used differently depending
-    # on the endpoint (e.g. chat vs non-chat).
-    _CONTENT_NAMES = [
-        "text",
-        # OPENORCA
-        "system_prompt",
-        "question",
-        # CNN DAILYMAIL
-        "article",
-    ]
-
-    def convert(self, generic_dataset: Dict, config: InputsConfig) -> Dict:
+    def convert(
+        self, generic_dataset: GenericDataset, config: InputsConfig
+    ) -> Dict[Any, Any]:
         request_body: Dict[str, Any] = {"data": []}
 
-        for index, entry in enumerate(generic_dataset["rows"]):
-            model_name = self._select_model_name(config, index)
-            prompt = self._construct_text_payload(entry)
+        for file_data in generic_dataset.files_data.values():
+            for index, row in enumerate(file_data.rows):
+                model_name = self._select_model_name(config, index)
+                prompt = row.texts
 
-            payload = {
-                "model": model_name,
-                "prompt": prompt,
-            }
-            self._add_request_params(payload, config)
-            request_body["data"].append({"payload": [payload]})
+                payload = {
+                    "model": model_name,
+                    "prompt": prompt,
+                }
+                self._add_request_params(payload, config)
+                request_body["data"].append({"payload": [payload]})
 
         return request_body
 

diff --git a/genai-perf/genai_perf/inputs/converters/openai_embeddings_converter.py b/genai-perf/genai_perf/inputs/converters/openai_embeddings_converter.py
@@ -26,31 +26,35 @@
 
 from typing import Any, Dict
 
+from genai_perf.exceptions import GenAIPerfException
 from genai_perf.inputs.converters.base_converter import BaseConverter
 from genai_perf.inputs.inputs_config import InputsConfig
+from genai_perf.inputs.retrievers.generic_dataset import GenericDataset
 
 
 class OpenAIEmbeddingsConverter(BaseConverter):
 
-    _CONTENT_NAMES = [
-        "text",
-    ]
+    def check_config(self, config: InputsConfig) -> None:
+        if config.add_stream:
+            raise GenAIPerfException(
+                f"The --streaming option is not supported for {config.output_format.to_lowercase()}."
+            )
 
-    def convert(self, generic_dataset: Dict, config: InputsConfig) -> Dict:
+    def convert(
+        self, generic_dataset: GenericDataset, config: InputsConfig
+    ) -> Dict[Any, Any]:
         request_body: Dict[str, Any] = {"data": []}
 
-        for index, entry in enumerate(generic_dataset["rows"]):
-            text = self._construct_text_payload_batch_agnostic(
-                config.batch_size_text, entry
-            )
-            model_name = self._select_model_name(config, index)
-
-            payload = {
-                "model": model_name,
-                "input": text,
-            }
-            self._add_request_params(payload, config)
-            request_body["data"].append({"payload": [payload]})
+        for file_data in generic_dataset.files_data.values():
+            for index, row in enumerate(file_data.rows):
+                model_name = self._select_model_name(config, index)
+
+                payload = {
+                    "model": model_name,
+                    "input": row.texts,
+                }
+                self._add_request_params(payload, config)
+                request_body["data"].append({"payload": [payload]})
 
         return request_body