triton-inference-server · dyastremsky · Oct 19, 2024 · Oct 17, 2024 · Oct 19, 2024 · matthewkotila
diff --git a/.github/workflows/python-package-genai.yml b/.github/workflows/python-package-genai.yml
@@ -39,7 +39,7 @@ jobs:
       fail-fast: false
       matrix:
         os: ["ubuntu-22.04"]
-        python-version: ["3.8", "3.10"]
+        python-version: ["3.10"]
 
     steps:
     - uses: actions/checkout@v3

diff --git a/genai-perf/README.md b/genai-perf/README.md
@@ -89,7 +89,7 @@ genai-perf --help
 Since GenAI-Perf depends on Perf Analyzer,
 you'll need to install the Perf Analyzer binary:
 
-### Install Perf Analyzer (Ubuntu, Python 3.8+)
+### Install Perf Analyzer (Ubuntu, Python 3.10+)
 
 **NOTE**: you must already have CUDA 12 installed
 (checkout the [CUDA installation guide](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html)).
@@ -282,7 +282,7 @@ When the dataset is synthetic, you can specify the following options:
 
 When the dataset is coming from a file, you can specify the following
 options:
-* `--input-file <path>`: The input file or directory containing the prompts or 
+* `--input-file <path>`: The input file or directory containing the prompts or
   filepaths to images to use for benchmarking as JSON objects.
 
 For any dataset, you can specify the following options:
@@ -420,7 +420,7 @@ Alternatively, a string representing a json formatted dict can be provided.
 
 ##### `--input-file <path>`
 
-The input file or directory containing the content to use for 
+The input file or directory containing the content to use for
 profiling. To use synthetic files for a converter that needs
 multiple files, prefix the path with 'synthetic:', followed by a
 comma-separated list of filenames. The synthetic filenames should not have

diff --git a/genai-perf/genai_perf/inputs/converters/base_converter.py b/genai-perf/genai_perf/inputs/converters/base_converter.py
@@ -42,12 +42,14 @@ class BaseConverter:
     def check_config(self, config: InputsConfig) -> None:
         """
         Check whether the provided configuration is valid for this converter.
-        
+
         Throws a GenAIPerfException if the configuration is invalid.
         """
         pass
 
-    def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict[Any, Any]:
+    def convert(
+        self, generic_dataset: GenericDataset, config: InputsConfig
+    ) -> Dict[Any, Any]:
         """
         Construct a request body using the endpoint specific request format.
         """

diff --git a/genai-perf/genai_perf/inputs/converters/openai_chat_completions_converter.py b/genai-perf/genai_perf/inputs/converters/openai_chat_completions_converter.py
@@ -27,27 +27,41 @@
 import random
 from typing import Any, Dict, List, Union
 
+from genai_perf.exceptions import GenAIPerfException
 from genai_perf.inputs.converters.base_converter import BaseConverter
-from genai_perf.inputs.input_constants import DEFAULT_OUTPUT_TOKENS_MEAN, OutputFormat
+from genai_perf.inputs.input_constants import (
+    DEFAULT_BATCH_SIZE,
+    DEFAULT_OUTPUT_TOKENS_MEAN,
+    OutputFormat,
+)
 from genai_perf.inputs.inputs_config import InputsConfig
 from genai_perf.inputs.retrievers.generic_dataset import DataRow, GenericDataset
-from genai_perf.exceptions import GenAIPerfException
-from genai_perf.inputs.input_constants import DEFAULT_BATCH_SIZE
+
 
 class OpenAIChatCompletionsConverter(BaseConverter):
 
     def check_config(self, config: InputsConfig) -> None:
         if config.output_format == OutputFormat.IMAGE_RETRIEVAL:
             if config.add_stream:
-                raise GenAIPerfException(f"The --streaming option is not supported for {config.output_format.to_lowercase()}.")
-        elif config.output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS or config.output_format == OutputFormat.OPENAI_VISION:
+                raise GenAIPerfException(
+                    f"The --streaming option is not supported for {config.output_format.to_lowercase()}."
+                )
+        elif (
+            config.output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS
+            or config.output_format == OutputFormat.OPENAI_VISION
+        ):
             if config.batch_size_text != DEFAULT_BATCH_SIZE:
-                raise GenAIPerfException(f"The --batch-size-text flag is not supported for {config.output_format.to_lowercase()}.")
+                raise GenAIPerfException(
+                    f"The --batch-size-text flag is not supported for {config.output_format.to_lowercase()}."
+                )
             if config.batch_size_image != DEFAULT_BATCH_SIZE:
-                raise GenAIPerfException(f"The --batch-size-image flag is not supported for {config.output_format.to_lowercase()}.")
+                raise GenAIPerfException(
+                    f"The --batch-size-image flag is not supported for {config.output_format.to_lowercase()}."
+                )
 
-
-    def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict[Any, Any]:
+    def convert(
+        self, generic_dataset: GenericDataset, config: InputsConfig
+    ) -> Dict[Any, Any]:
         request_body: Dict[str, Any] = {"data": []}
 
         for file_data in generic_dataset.files_data.values():
@@ -57,7 +71,9 @@ def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict
 
         return request_body
 
-    def _create_payload(self, index: int, row: DataRow, config: InputsConfig) -> Dict[Any, Any]:
+    def _create_payload(
+        self, index: int, row: DataRow, config: InputsConfig
+    ) -> Dict[Any, Any]:
         model_name = self._select_model_name(config, index)
         content = self._retrieve_content(row, config)
 
@@ -74,18 +90,25 @@ def _create_payload(self, index: int, row: DataRow, config: InputsConfig) -> Dic
         self._add_request_params(payload, config)
         return payload
 
-    def _retrieve_content(self, row: DataRow, config: InputsConfig) -> Union[str, List[Dict[Any, Any]]]:
+    def _retrieve_content(
+        self, row: DataRow, config: InputsConfig
+    ) -> Union[str, List[Dict[Any, Any]]]:
         content: Union[str, List[Dict[Any, Any]]] = ""
         if config.output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS:
             content = row.texts[0]
-        elif config.output_format == OutputFormat.OPENAI_VISION or config.output_format == OutputFormat.IMAGE_RETRIEVAL:
+        elif (
+            config.output_format == OutputFormat.OPENAI_VISION
+            or config.output_format == OutputFormat.IMAGE_RETRIEVAL
+        ):
             content = self._add_multi_modal_content(row)
         else:
-            raise GenAIPerfException(f"Output format {config.output_format} is not supported")
+            raise GenAIPerfException(
+                f"Output format {config.output_format} is not supported"
+            )
         return content
-    
+
     def _add_multi_modal_content(self, entry: DataRow) -> List[Dict[Any, Any]]:
-        content = []
+        content: List[Dict[Any, Any]] = []
         for text in entry.texts:
             content.append(
                 {

diff --git a/genai-perf/genai_perf/inputs/converters/openai_completions_converter.py b/genai-perf/genai_perf/inputs/converters/openai_completions_converter.py
@@ -32,9 +32,12 @@
 from genai_perf.inputs.inputs_config import InputsConfig
 from genai_perf.inputs.retrievers.generic_dataset import GenericDataset
 
+
 class OpenAICompletionsConverter(BaseConverter):
 
-    def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict[Any, Any]:
+    def convert(
+        self, generic_dataset: GenericDataset, config: InputsConfig
+    ) -> Dict[Any, Any]:
         request_body: Dict[str, Any] = {"data": []}
 
         for file_data in generic_dataset.files_data.values():

diff --git a/genai-perf/genai_perf/inputs/converters/openai_embeddings_converter.py b/genai-perf/genai_perf/inputs/converters/openai_embeddings_converter.py
@@ -26,18 +26,23 @@
 
 from typing import Any, Dict
 
+from genai_perf.exceptions import GenAIPerfException
 from genai_perf.inputs.converters.base_converter import BaseConverter
 from genai_perf.inputs.inputs_config import InputsConfig
 from genai_perf.inputs.retrievers.generic_dataset import GenericDataset
-from genai_perf.exceptions import GenAIPerfException
+
 
 class OpenAIEmbeddingsConverter(BaseConverter):
 
     def check_config(self, config: InputsConfig) -> None:
         if config.add_stream:
-            raise GenAIPerfException(f"The --streaming option is not supported for {config.output_format.to_lowercase()}.")
-
-    def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict[Any, Any]:
+            raise GenAIPerfException(
+                f"The --streaming option is not supported for {config.output_format.to_lowercase()}."
+            )
+
+    def convert(
+        self, generic_dataset: GenericDataset, config: InputsConfig
+    ) -> Dict[Any, Any]:
         request_body: Dict[str, Any] = {"data": []}
 
         for file_data in generic_dataset.files_data.values():

diff --git a/genai-perf/genai_perf/inputs/converters/rankings_converter.py b/genai-perf/genai_perf/inputs/converters/rankings_converter.py
@@ -24,26 +24,31 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from typing import Any, Dict
+from typing import Any, Dict, List, Union
 
+from genai_perf.exceptions import GenAIPerfException
 from genai_perf.inputs.converters.base_converter import BaseConverter
 from genai_perf.inputs.inputs_config import InputsConfig
 from genai_perf.inputs.retrievers.generic_dataset import GenericDataset
-from genai_perf.exceptions import GenAIPerfException
+
 
 class RankingsConverter(BaseConverter):
 
     def check_config(self, config: InputsConfig) -> None:
         if config.add_stream:
-            raise GenAIPerfException(f"The --streaming option is not supported for {config.output_format.to_lowercase()}.")
-
-    def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict[Any, Any]:
+            raise GenAIPerfException(
+                f"The --streaming option is not supported for {config.output_format.to_lowercase()}."
+            )
+
+    def convert(
+        self, generic_dataset: GenericDataset, config: InputsConfig
+    ) -> Dict[Any, Any]:
         provided_filenames = list(generic_dataset.files_data.keys())
         if "queries" not in provided_filenames or "passages" not in provided_filenames:
             raise ValueError(
                 "Both 'queries.jsonl' and 'passages.jsonl' must be present in the input datasets."
             )
-        
+
         queries_data = generic_dataset.files_data["queries"]
         passages_data = generic_dataset.files_data["passages"]
 
@@ -60,27 +65,25 @@ def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict
 
             passage_entry = passages_data.rows[query_index]
 
+            passages: Union[List[str], List[Dict[str, str]]]
+            payload: Dict[str, Any]
 
             if self._is_rankings_tei(config):
                 passages = passage_entry.texts
                 payload = {"query": query, "texts": passages}
             else:
                 passages = [
-                    {"text_input": p}
-                    for p in passage_entry.texts
-                    if p is not None
+                    {"text_input": p} for p in passage_entry.texts if p is not None
                 ]
                 payload = {
                     "query": query,
                     "passages": passages,
                     "model": model_name,
                 }
 
-
             self._add_request_params(payload, config)
             request_body["data"].append({"payload": [payload]})
 
-
         return request_body
 
     def _is_rankings_tei(self, config: InputsConfig) -> bool:

diff --git a/genai-perf/genai_perf/inputs/converters/tensorrtllm_converter.py b/genai-perf/genai_perf/inputs/converters/tensorrtllm_converter.py
@@ -27,25 +27,30 @@
 import random
 from typing import Any, Dict
 
+from genai_perf.exceptions import GenAIPerfException
 from genai_perf.inputs.converters.base_converter import BaseConverter
 from genai_perf.inputs.input_constants import (
+    DEFAULT_BATCH_SIZE,
     DEFAULT_OUTPUT_TOKENS_MEAN,
     DEFAULT_TENSORRTLLM_MAX_TOKENS,
 )
 from genai_perf.inputs.inputs_config import InputsConfig
 from genai_perf.inputs.retrievers.generic_dataset import GenericDataset
-from genai_perf.exceptions import GenAIPerfException
-from genai_perf.inputs.input_constants import DEFAULT_BATCH_SIZE
+
 
 class TensorRTLLMConverter(BaseConverter):
 
     def check_config(self, config: InputsConfig) -> None:
         if config.batch_size_text != DEFAULT_BATCH_SIZE:
-            raise GenAIPerfException(f"The --batch-size-text flag is not supported for {config.output_format.to_lowercase()}.")
-
-    def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict[Any, Any]:
+            raise GenAIPerfException(
+                f"The --batch-size-text flag is not supported for {config.output_format.to_lowercase()}."
+            )
+
+    def convert(
+        self, generic_dataset: GenericDataset, config: InputsConfig
+    ) -> Dict[Any, Any]:
         request_body: Dict[str, Any] = {"data": []}
-        
+
         for file_data in generic_dataset.files_data.values():
             for index, row in enumerate(file_data.rows):
                 model_name = self._select_model_name(config, index)

diff --git a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py
@@ -27,22 +27,27 @@
 import random
 from typing import Any, Dict
 
+from genai_perf.exceptions import GenAIPerfException
 from genai_perf.inputs.converters.base_converter import BaseConverter
 from genai_perf.inputs.input_constants import (
+    DEFAULT_BATCH_SIZE,
     DEFAULT_OUTPUT_TOKENS_MEAN,
     DEFAULT_TENSORRTLLM_MAX_TOKENS,
 )
 from genai_perf.inputs.inputs_config import InputsConfig
 from genai_perf.inputs.retrievers.generic_dataset import GenericDataset
-from genai_perf.exceptions import GenAIPerfException
-from genai_perf.inputs.input_constants import DEFAULT_BATCH_SIZE
+
 
 class TensorRTLLMEngineConverter(BaseConverter):
     def check_config(self, config: InputsConfig) -> None:
         if config.batch_size_text != DEFAULT_BATCH_SIZE:
-            raise GenAIPerfException(f"The --batch-size-text flag is not supported for {config.output_format.to_lowercase()}.")
-
-    def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict[Any, Any]:
+            raise GenAIPerfException(
+                f"The --batch-size-text flag is not supported for {config.output_format.to_lowercase()}."
+            )
+
+    def convert(
+        self, generic_dataset: GenericDataset, config: InputsConfig
+    ) -> Dict[Any, Any]:
         request_body: Dict[str, Any] = {"data": []}
 
         for file_data in generic_dataset.files_data.values():

diff --git a/genai-perf/genai_perf/inputs/converters/vllm_converter.py b/genai-perf/genai_perf/inputs/converters/vllm_converter.py
@@ -28,20 +28,27 @@
 import random
 from typing import Any, Dict
 
+from genai_perf.exceptions import GenAIPerfException
 from genai_perf.inputs.converters.base_converter import BaseConverter
-from genai_perf.inputs.input_constants import DEFAULT_OUTPUT_TOKENS_MEAN
+from genai_perf.inputs.input_constants import (
+    DEFAULT_BATCH_SIZE,
+    DEFAULT_OUTPUT_TOKENS_MEAN,
+)
 from genai_perf.inputs.inputs_config import InputsConfig
 from genai_perf.inputs.retrievers.generic_dataset import GenericDataset
-from genai_perf.exceptions import GenAIPerfException
-from genai_perf.inputs.input_constants import DEFAULT_BATCH_SIZE
+
 
 class VLLMConverter(BaseConverter):
 
     def check_config(self, config: InputsConfig) -> None:
         if config.batch_size_text != DEFAULT_BATCH_SIZE:
-            raise GenAIPerfException(f"The --batch-size-text flag is not supported for {config.output_format.to_lowercase()}.")
-
-    def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict[Any, Any]:
+            raise GenAIPerfException(
+                f"The --batch-size-text flag is not supported for {config.output_format.to_lowercase()}."
+            )
+
+    def convert(
+        self, generic_dataset: GenericDataset, config: InputsConfig
+    ) -> Dict[Any, Any]:
         request_body: Dict[str, Any] = {"data": []}
 
         for file_data in generic_dataset.files_data.values():