diff --git a/genai-perf/genai_perf/inputs/converters/base_converter.py b/genai-perf/genai_perf/inputs/converters/base_converter.py index 416d986b..c9f29915 100644 --- a/genai-perf/genai_perf/inputs/converters/base_converter.py +++ b/genai-perf/genai_perf/inputs/converters/base_converter.py @@ -39,6 +39,14 @@ class BaseConverter: and convert them to endpoint-specific payloads. """ + def check_config(self, config: InputsConfig) -> None: + """ + Check whether the provided configuration is valid for this converter. + + Throws a GenAIPerfException if the configuration is invalid. + """ + pass + def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict[Any, Any]: """ Construct a request body using the endpoint specific request format. diff --git a/genai-perf/genai_perf/inputs/converters/openai_chat_completions_converter.py b/genai-perf/genai_perf/inputs/converters/openai_chat_completions_converter.py index bb5e3367..6b47c718 100644 --- a/genai-perf/genai_perf/inputs/converters/openai_chat_completions_converter.py +++ b/genai-perf/genai_perf/inputs/converters/openai_chat_completions_converter.py @@ -28,13 +28,31 @@ from typing import Any, Dict, List from genai_perf.inputs.converters.base_converter import BaseConverter -from genai_perf.inputs.input_constants import DEFAULT_OUTPUT_TOKENS_MEAN, OutputFormat +from genai_perf.inputs.input_constants import DEFAULT_OUTPUT_TOKENS_MEAN, OutputFormat, PromptSource from genai_perf.inputs.inputs_config import InputsConfig from genai_perf.inputs.retrievers.generic_dataset import DataRow, GenericDataset - +from genai_perf.exceptions import GenAIPerfException +from genai_perf.inputs.input_constants import DEFAULT_BATCH_SIZE class OpenAIChatCompletionsConverter(BaseConverter): + def check_config(self, config: InputsConfig) -> None: + if config.output_format == OutputFormat.IMAGE_RETRIEVAL: + if config.add_stream: + raise GenAIPerfException(f"The --streaming option is not supported for {config.output_format.to_lowercase}.") + # TODO: Confirm that this is required. This may work with synthetic now. + if config.input_type != PromptSource.FILE: + raise GenAIPerfException( + f"{config.output_format.to_lowercase()} only supports " + "a file as input source." + ) + else: + if config.batch_size_image != DEFAULT_BATCH_SIZE: + raise GenAIPerfException(f"The --batch-size-image flag is not supported for {config.output_format.to_lowercase}.") + if config.batch_size_text != DEFAULT_BATCH_SIZE: + raise GenAIPerfException(f"The --batch-size-text flag is not supported for {config.output_format.to_lowercase}.") + + def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict[Any, Any]: request_body: Dict[str, Any] = {"data": []} diff --git a/genai-perf/genai_perf/inputs/converters/openai_completions_converter.py b/genai-perf/genai_perf/inputs/converters/openai_completions_converter.py index 2d07a1f0..bdae737e 100644 --- a/genai-perf/genai_perf/inputs/converters/openai_completions_converter.py +++ b/genai-perf/genai_perf/inputs/converters/openai_completions_converter.py @@ -31,10 +31,17 @@ from genai_perf.inputs.input_constants import DEFAULT_OUTPUT_TOKENS_MEAN from genai_perf.inputs.inputs_config import InputsConfig from genai_perf.inputs.retrievers.generic_dataset import GenericDataset - +from genai_perf.exceptions import GenAIPerfException +from genai_perf.inputs.input_constants import DEFAULT_BATCH_SIZE class OpenAICompletionsConverter(BaseConverter): + def check_config(self, config: InputsConfig) -> None: + if config.batch_size_image != DEFAULT_BATCH_SIZE: + raise GenAIPerfException(f"The --batch-size-image flag is not supported for {config.output_format.to_lowercase}.") + if config.batch_size_text != DEFAULT_BATCH_SIZE: + raise GenAIPerfException(f"The --batch-size-text flag is not supported for {config.output_format.to_lowercase}.") + def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict[Any, Any]: request_body: Dict[str, Any] = {"data": []} diff --git a/genai-perf/genai_perf/inputs/converters/openai_embeddings_converter.py b/genai-perf/genai_perf/inputs/converters/openai_embeddings_converter.py index 5f15cfe6..ddbe4eab 100644 --- a/genai-perf/genai_perf/inputs/converters/openai_embeddings_converter.py +++ b/genai-perf/genai_perf/inputs/converters/openai_embeddings_converter.py @@ -29,10 +29,17 @@ from genai_perf.inputs.converters.base_converter import BaseConverter from genai_perf.inputs.inputs_config import InputsConfig from genai_perf.inputs.retrievers.generic_dataset import GenericDataset - +from genai_perf.exceptions import GenAIPerfException +from genai_perf.inputs.input_constants import DEFAULT_BATCH_SIZE class OpenAIEmbeddingsConverter(BaseConverter): + def check_config(self, config: InputsConfig) -> None: + if config.add_stream: + raise GenAIPerfException(f"The --streaming option is not supported for {config.output_format.to_lowercase}.") + if config.batch_size_image != DEFAULT_BATCH_SIZE: + raise GenAIPerfException(f"The --batch-size-image flag is not supported for {config.output_format.to_lowercase}.") + def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict[Any, Any]: request_body: Dict[str, Any] = {"data": []} @@ -44,8 +51,8 @@ def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict "model": model_name, "input": row.texts, } - self._add_request_params(payload, config) - request_body["data"].append({"payload": [payload]}) + self._add_request_params(payload, config) + request_body["data"].append({"payload": [payload]}) return request_body diff --git a/genai-perf/genai_perf/inputs/converters/rankings_converter.py b/genai-perf/genai_perf/inputs/converters/rankings_converter.py index b9637ee4..241b93e2 100644 --- a/genai-perf/genai_perf/inputs/converters/rankings_converter.py +++ b/genai-perf/genai_perf/inputs/converters/rankings_converter.py @@ -29,10 +29,22 @@ from genai_perf.inputs.converters.base_converter import BaseConverter from genai_perf.inputs.inputs_config import InputsConfig from genai_perf.inputs.retrievers.generic_dataset import GenericDataset - +from genai_perf.exceptions import GenAIPerfException +from genai_perf.inputs.input_constants import DEFAULT_BATCH_SIZE class RankingsConverter(BaseConverter): + def check_config(self, config: InputsConfig) -> None: + if config.add_stream: + raise GenAIPerfException(f"The --streaming option is not supported for {config.output_format.to_lowercase}.") + if config.batch_size_text != DEFAULT_BATCH_SIZE: + raise GenAIPerfException(f"The --batch-size-text flag is not supported for {config.output_format.to_lowercase}.") + if config.input_type != "file": + raise GenAIPerfException( + f"{config.output_format.to_lowercase()} only supports " + "a file as input source." + ) + def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict[Any, Any]: provided_filenames = list(generic_dataset.files_data.keys()) if "queries" not in provided_filenames or "passages" not in provided_filenames: diff --git a/genai-perf/genai_perf/inputs/converters/tensorrtllm_converter.py b/genai-perf/genai_perf/inputs/converters/tensorrtllm_converter.py index 549b42cd..8922a0ac 100644 --- a/genai-perf/genai_perf/inputs/converters/tensorrtllm_converter.py +++ b/genai-perf/genai_perf/inputs/converters/tensorrtllm_converter.py @@ -34,13 +34,20 @@ ) from genai_perf.inputs.inputs_config import InputsConfig from genai_perf.inputs.retrievers.generic_dataset import GenericDataset - +from genai_perf.exceptions import GenAIPerfException +from genai_perf.inputs.input_constants import DEFAULT_BATCH_SIZE class TensorRTLLMConverter(BaseConverter): + def check_config(self, config: InputsConfig) -> None: + if config.batch_size_image != DEFAULT_BATCH_SIZE: + raise GenAIPerfException(f"The --batch-size-image flag is not supported for {config.output_format.to_lowercase}.") + if config.batch_size_text != DEFAULT_BATCH_SIZE: + raise GenAIPerfException(f"The --batch-size-text flag is not supported for {config.output_format.to_lowercase}.") + def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict[Any, Any]: request_body: Dict[str, Any] = {"data": []} - + for file_data in generic_dataset.files_data.values(): for index, row in enumerate(file_data.rows): model_name = self._select_model_name(config, index) diff --git a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py index dbea21ec..7bd0793e 100644 --- a/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py +++ b/genai-perf/genai_perf/inputs/converters/tensorrtllm_engine_converter.py @@ -34,10 +34,16 @@ ) from genai_perf.inputs.inputs_config import InputsConfig from genai_perf.inputs.retrievers.generic_dataset import GenericDataset - +from genai_perf.exceptions import GenAIPerfException +from genai_perf.inputs.input_constants import DEFAULT_BATCH_SIZE class TensorRTLLMEngineConverter(BaseConverter): - + def check_config(self, config: InputsConfig) -> None: + if config.batch_size_image != DEFAULT_BATCH_SIZE: + raise GenAIPerfException(f"The --batch-size-image flag is not supported for {config.output_format.to_lowercase}.") + if config.batch_size_text != DEFAULT_BATCH_SIZE: + raise GenAIPerfException(f"The --batch-size-text flag is not supported for {config.output_format.to_lowercase}.") + def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict[Any, Any]: request_body: Dict[str, Any] = {"data": []} diff --git a/genai-perf/genai_perf/inputs/converters/vllm_converter.py b/genai-perf/genai_perf/inputs/converters/vllm_converter.py index b0055ef9..9b00b183 100644 --- a/genai-perf/genai_perf/inputs/converters/vllm_converter.py +++ b/genai-perf/genai_perf/inputs/converters/vllm_converter.py @@ -32,10 +32,17 @@ from genai_perf.inputs.input_constants import DEFAULT_OUTPUT_TOKENS_MEAN from genai_perf.inputs.inputs_config import InputsConfig from genai_perf.inputs.retrievers.generic_dataset import GenericDataset - +from genai_perf.exceptions import GenAIPerfException +from genai_perf.inputs.input_constants import DEFAULT_BATCH_SIZE class VLLMConverter(BaseConverter): + def check_config(self, config: InputsConfig) -> None: + if config.batch_size_image != DEFAULT_BATCH_SIZE: + raise GenAIPerfException(f"The --batch-size-image flag is not supported for {config.output_format.to_lowercase}.") + if config.batch_size_text != DEFAULT_BATCH_SIZE: + raise GenAIPerfException(f"The --batch-size-text flag is not supported for {config.output_format.to_lowercase}.") + def convert(self, generic_dataset: GenericDataset, config: InputsConfig) -> Dict[Any, Any]: request_body: Dict[str, Any] = {"data": []} diff --git a/genai-perf/genai_perf/inputs/inputs.py b/genai-perf/genai_perf/inputs/inputs.py index 08ada2e5..72c6b15e 100644 --- a/genai-perf/genai_perf/inputs/inputs.py +++ b/genai-perf/genai_perf/inputs/inputs.py @@ -40,8 +40,8 @@ class Inputs: def __init__(self, config: InputsConfig): self.config = config - if self.config.extra_inputs is None: - self.config.extra_inputs = {} + self.converter = OutputFormatConverterFactory.create(self.config.output_format) + self.converter.check_config(self.config) random.seed(self.config.random_seed) @@ -60,31 +60,18 @@ def create_inputs(self) -> None: self._write_json_to_file(json_in_pa_format) def _check_for_valid_args(self) -> None: - self._check_for_supported_input_type() self._check_for_tokenzier_if_input_type_is_synthetic() self._check_for_valid_starting_index() self._check_for_valid_length() def _convert_generic_dataset_to_output_format(self, generic_dataset) -> Dict: - converter = OutputFormatConverterFactory.create(self.config.output_format) - return converter.convert(generic_dataset, self.config) + return self.converter.convert(generic_dataset, self.config) def _write_json_to_file(self, json_in_pa_format: Dict) -> None: filename = self.config.output_dir / DEFAULT_INPUT_DATA_JSON with open(str(filename), "w") as f: f.write(json.dumps(json_in_pa_format, indent=2)) - def _check_for_supported_input_type(self) -> None: - if self.config.output_format in [ - OutputFormat.RANKINGS, - OutputFormat.IMAGE_RETRIEVAL, - ]: - if self.config.input_type != PromptSource.FILE: - raise GenAIPerfException( - f"{self.config.output_format.to_lowercase()} only supports " - "a file as input source." - ) - def _check_for_tokenzier_if_input_type_is_synthetic(self) -> None: if ( self.config.input_type == PromptSource.SYNTHETIC diff --git a/genai-perf/genai_perf/inputs/inputs_config.py b/genai-perf/genai_perf/inputs/inputs_config.py index e20a147c..ff038f25 100644 --- a/genai-perf/genai_perf/inputs/inputs_config.py +++ b/genai-perf/genai_perf/inputs/inputs_config.py @@ -62,10 +62,10 @@ class InputsConfig: # If true, adds a steam field to each payload add_stream: bool = False - # The number of image inputs per request (currently only used for the image retrieval endpoint) + # The number of image inputs per request batch_size_image: int = 1 - # The number of text inputs per request (currently only used for the embeddings and rankings endpoints) + # The number of text inputs per request batch_size_text: int = 1 # If provided, append these inputs to every request diff --git a/genai-perf/genai_perf/inputs/retrievers/base_input_retriever.py b/genai-perf/genai_perf/inputs/retrievers/base_input_retriever.py index 059eb43d..77f53ecd 100644 --- a/genai-perf/genai_perf/inputs/retrievers/base_input_retriever.py +++ b/genai-perf/genai_perf/inputs/retrievers/base_input_retriever.py @@ -37,4 +37,4 @@ def retrieve_data(self) -> GenericDataset: """ Method to retrieve data as a GenericDataset. """ - raise NotImplementedError("This method should be implemented by subclasses.") \ No newline at end of file + raise NotImplementedError("This method should be implemented by subclasses.") diff --git a/genai-perf/genai_perf/inputs/retrievers/file_input_retriever.py b/genai-perf/genai_perf/inputs/retrievers/file_input_retriever.py index e2c17a49..2c5e98c6 100644 --- a/genai-perf/genai_perf/inputs/retrievers/file_input_retriever.py +++ b/genai-perf/genai_perf/inputs/retrievers/file_input_retriever.py @@ -197,4 +197,4 @@ def _encode_image(self, filename: str) -> str: img_base64 = utils.encode_image(img, img.format) payload = f"data:image/{img.format.lower()};base64,{img_base64}" - return payload \ No newline at end of file + return payload diff --git a/genai-perf/genai_perf/inputs/retrievers/input_retriever_factory.py b/genai-perf/genai_perf/inputs/retrievers/input_retriever_factory.py index af2bbf15..2dcab58a 100644 --- a/genai-perf/genai_perf/inputs/retrievers/input_retriever_factory.py +++ b/genai-perf/genai_perf/inputs/retrievers/input_retriever_factory.py @@ -48,4 +48,4 @@ def create(config: InputsConfig) -> BaseInputRetriever: if input_type not in retrievers: raise GenAIPerfException(f"Input source '{input_type}' is not recognized.") retriever_class = retrievers[input_type]() - return retriever_class(config) \ No newline at end of file + return retriever_class(config) diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index 0ef51fb4..eaee443b 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -135,6 +135,8 @@ def _check_conditional_args( """ # Endpoint and output format checks + # TODO: Replace this with a more robust solution. + # Currently, a new endpoint would need to add support here. if args.service_kind == "openai": if args.endpoint_type is None: parser.error( @@ -191,53 +193,17 @@ def _check_conditional_args( "with the Triton and TensorRT-LLM Engine service-kind." ) - _check_conditional_args_embeddings_rankings(parser, args) - - return args - - -def _check_conditional_args_embeddings_rankings( - parser: argparse.ArgumentParser, args: argparse.Namespace -): - if args.output_format in [ ic.OutputFormat.OPENAI_EMBEDDINGS, ic.OutputFormat.RANKINGS, ic.OutputFormat.IMAGE_RETRIEVAL, ]: - if args.streaming: - parser.error( - f"The --streaming option is not supported with the {args.endpoint_type} endpoint type." - ) - if args.generate_plots: parser.error( f"The --generate-plots option is not currently supported with the {args.endpoint_type} endpoint type." ) - else: - if args.batch_size_text != ic.DEFAULT_BATCH_SIZE: - parser.error( - "The --batch-size-text option is currently only supported " - "with the embeddings and rankings endpoint types." - ) - if args.batch_size_image != ic.DEFAULT_BATCH_SIZE: - parser.error( - "The --batch-size-image option is currently only supported " - "with the image retrieval endpoint type." - ) - if args.input_file: - _, path_type = args.input_file - if args.output_format != ic.OutputFormat.RANKINGS: - if path_type == "directory": - parser.error( - "A directory is only currently supported for the rankings endpoint type." - ) - else: - if path_type == PathType.FILE: - parser.error( - "The rankings endpoint-type requires a directory value for the --input-file flag." - ) + return args def _check_load_manager_args(args: argparse.Namespace) -> argparse.Namespace: @@ -380,15 +346,9 @@ def parse_goodput(values): def _infer_prompt_source(args: argparse.Namespace) -> argparse.Namespace: if args.input_file: args.prompt_source = ic.PromptSource.FILE - if args.endpoint_type == "rankings": - logger.debug( - f"Input source is the following directory: {args.input_file[0]}" - ) - else: - logger.debug(f"Input source is the following file: {args.input_file[0]}") + logger.debug(f"Input source is the following path: {args.input_file[0]}") else: args.prompt_source = ic.PromptSource.SYNTHETIC - logger.debug("Input source is synthetic data") return args