Skip to content

Commit

Permalink
New config options based on live run (#775)
Browse files Browse the repository at this point in the history
* Added new config options and modified existing options

* Refactoring model parameter setting

* Removing magic numbers
  • Loading branch information
nv-braf authored Oct 17, 2023
1 parent 14ea528 commit 6e0fc24
Show file tree
Hide file tree
Showing 8 changed files with 257 additions and 225 deletions.
2 changes: 2 additions & 0 deletions model_analyzer/config/generate/generator_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ def generate_doubled_list(min_value: int, max_value: int) -> List[int]:
The value that the generated list will not exceed
"""

assert min_value <= max_value

list = []
val = 1 if min_value == 0 else min_value
while val <= max_value:
Expand Down
36 changes: 28 additions & 8 deletions model_analyzer/config/generate/perf_analyzer_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,14 @@
from typing import Dict, Generator, List, Optional, Tuple

from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
from model_analyzer.config.input.config_defaults import DEFAULT_INPUT_JSON_PATH
from model_analyzer.config.input.config_defaults import (
DEFAULT_INPUT_JSON_PATH,
DEFAULT_RUN_CONFIG_MIN_CONCURRENCY,
DEFAULT_RUN_CONFIG_MIN_MAX_TOKEN_COUNT,
DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY,
DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE,
DEFAULT_RUN_CONFIG_MIN_TEXT_INPUT_LENGTH,
)
from model_analyzer.constants import (
LOGGER_NAME,
THROUGHPUT_MINIMUM_CONSECUTIVE_INFERENCE_LOAD_TRIES,
Expand Down Expand Up @@ -211,7 +218,9 @@ def _create_inference_load_list(self) -> List[int]:
# The two possible inference loads are request rate or concurrency
# Concurrency is the default and will be used unless the user specifies
# request rate, either as a model parameter or a config option
if self._cli_config.is_request_rate_specified(self._model_parameters):
if self._cli_config.is_llm_model():
return self._create_periodic_concurrency_list()
elif self._cli_config.is_request_rate_specified(self._model_parameters):
return self._create_request_rate_list()
else:
return self._create_concurrency_list()
Expand All @@ -220,7 +229,7 @@ def _create_request_rate_list(self) -> List[int]:
if self._model_parameters["request_rate"]:
return sorted(self._model_parameters["request_rate"])
elif self._cli_config.run_config_search_disable:
return [1]
return [DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE]
else:
return utils.generate_doubled_list(
self._cli_config.run_config_search_min_request_rate,
Expand All @@ -231,21 +240,32 @@ def _create_concurrency_list(self) -> List[int]:
if self._model_parameters["concurrency"]:
return sorted(self._model_parameters["concurrency"])
elif self._cli_config.run_config_search_disable:
return [1]
return [DEFAULT_RUN_CONFIG_MIN_CONCURRENCY]
else:
return utils.generate_doubled_list(
self._cli_config.run_config_search_min_concurrency,
self._cli_config.run_config_search_max_concurrency,
)

def _create_periodic_concurrency_list(self) -> List[int]:
if self._model_parameters["periodic_concurrency"]:
return sorted(self._model_parameters["periodic_concurrency"])
elif self._cli_config.run_config_search_disable:
return [DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY]
else:
return utils.generate_doubled_list(
self._cli_config.run_config_search_min_periodic_concurrency,
self._cli_config.run_config_search_max_periodic_concurrency,
)

def _create_text_input_length_list(self) -> List[int]:
if not self._cli_config.is_llm_model():
return []

if self._model_parameters["text_input_length"]:
return sorted(self._model_parameters["text_input_length"])
elif self._cli_config.run_config_search_disable:
return [1]
return [DEFAULT_RUN_CONFIG_MIN_TEXT_INPUT_LENGTH]
else:
return utils.generate_doubled_list(
self._cli_config.run_config_search_min_text_input_length,
Expand All @@ -259,11 +279,11 @@ def _create_max_token_count_list(self) -> List[int]:
if self._model_parameters["max_token_count"]:
return sorted(self._model_parameters["max_token_count"])
elif self._cli_config.run_config_search_disable:
return [1]
return [DEFAULT_RUN_CONFIG_MIN_MAX_TOKEN_COUNT]
else:
return utils.generate_doubled_list(
self._cli_config.run_config_search_min_token_count,
self._cli_config.run_config_search_max_token_count,
self._cli_config.run_config_search_min_max_token_count,
self._cli_config.run_config_search_max_max_token_count,
)

def _generate_perf_configs(self) -> None:
Expand Down
192 changes: 135 additions & 57 deletions model_analyzer/config/input/config_command_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import argparse
import logging
import os
from typing import Dict

import numba.cuda
import psutil
Expand Down Expand Up @@ -497,7 +498,9 @@ def _add_profile_models_configs(self):
schema={
"batch_sizes": ConfigListNumeric(type_=int),
"concurrency": ConfigListNumeric(type_=int),
"periodic_concurrency": ConfigListNumeric(type_=int),
"request_rate": ConfigListNumeric(type_=int),
"request_period": ConfigListNumeric(type_=int),
"text_input_length": ConfigListNumeric(type_=int),
"max_token_count": ConfigListNumeric(type_=int),
}
Expand Down Expand Up @@ -562,6 +565,15 @@ def _add_profile_models_configs(self):
" to be used during profiling",
)
)
self._add_config(
ConfigField(
"periodic_concurrency",
flags=["--periodic-concurrency"],
field_type=ConfigListNumeric(int),
description="Comma-delimited list of periodic concurrency values or ranges <start:end:step>"
" to be used during profiling",
)
)
self._add_config(
ConfigField(
"request_rate",
Expand All @@ -571,6 +583,15 @@ def _add_profile_models_configs(self):
" to be used during profiling",
)
)
self._add_config(
ConfigField(
"request_period",
flags=["--request-period"],
field_type=ConfigListNumeric(int),
description="Comma-delimited list of request period values or ranges <start:end:step>"
" to be used during profiling",
)
)
self._add_config(
ConfigField(
"text_input_length",
Expand Down Expand Up @@ -687,7 +708,7 @@ def _add_run_search_configs(self):
flags=["--run-config-search-max-concurrency"],
field_type=ConfigPrimitive(int),
default_value=config_defaults.DEFAULT_RUN_CONFIG_MAX_CONCURRENCY,
description="Max concurrency value that run config search should not go beyond that.",
description="Max concurrency value that run config search should not go beyond.",
)
)
self._add_config(
Expand All @@ -699,13 +720,49 @@ def _add_run_search_configs(self):
description="Min concurrency value that run config search should start with.",
)
)
self._add_config(
ConfigField(
"run_config_search_max_periodic_concurrency",
flags=["--run-config-search-max-periodic-concurrency"],
field_type=ConfigPrimitive(int),
default_value=config_defaults.DEFAULT_RUN_CONFIG_MAX_PERIODIC_CONCURRENCY,
description="Max periodic concurrency value that run config search should not go beyond.",
)
)
self._add_config(
ConfigField(
"run_config_search_min_periodic_concurrency",
flags=["--run-config-search-min-periodic-concurrency"],
field_type=ConfigPrimitive(int),
default_value=config_defaults.DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY,
description="Min periodic concurrency value that run config search should start with.",
)
)
self._add_config(
ConfigField(
"run_config_search_max_periodic_concurrency_step",
flags=["--run-config-search-max-periodic-concurrency-step"],
field_type=ConfigPrimitive(int),
default_value=config_defaults.DEFAULT_RUN_CONFIG_MAX_PERIODIC_CONCURRENCY_STEP,
description="Max periodic concurrency step value that run config search should not go beyond.",
)
)
self._add_config(
ConfigField(
"run_config_search_min_periodic_concurrency_step",
flags=["--run-config-search-min-periodic-concurrency-step"],
field_type=ConfigPrimitive(int),
default_value=config_defaults.DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY_STEP,
description="Min periodic concurrency step value that run config search should start with.",
)
)
self._add_config(
ConfigField(
"run_config_search_max_request_rate",
flags=["--run-config-search-max-request-rate"],
field_type=ConfigPrimitive(int),
default_value=config_defaults.DEFAULT_RUN_CONFIG_MAX_REQUEST_RATE,
description="Max request rate value that run config search should not go beyond that.",
description="Max request rate value that run config search should not go beyond.",
)
)
self._add_config(
Expand All @@ -717,13 +774,31 @@ def _add_run_search_configs(self):
description="Min request rate value that run config search should start with.",
)
)
self._add_config(
ConfigField(
"run_config_search_max_request_period",
flags=["--run-config-search-max-request-period"],
field_type=ConfigPrimitive(int),
default_value=config_defaults.DEFAULT_RUN_CONFIG_MAX_REQUEST_PERIOD,
description="Max request period value that run config search should not go beyond.",
)
)
self._add_config(
ConfigField(
"run_config_search_min_request_period",
flags=["--run-config-search-min-request-period"],
field_type=ConfigPrimitive(int),
default_value=config_defaults.DEFAULT_RUN_CONFIG_MIN_REQUEST_PERIOD,
description="Min request period value that run config search should start with.",
)
)
self._add_config(
ConfigField(
"run_config_search_max_instance_count",
flags=["--run-config-search-max-instance-count"],
field_type=ConfigPrimitive(int),
default_value=config_defaults.DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT,
description="Max instance count value that run config search should not go beyond that.",
description="Max instance count value that run config search should not go beyond.",
)
)
self._add_config(
Expand Down Expand Up @@ -836,20 +911,20 @@ def _add_run_search_configs(self):
)
self._add_config(
ConfigField(
"run_config_search_min_token_count",
flags=["--run-config-search-min-token-count"],
"run_config_search_min_max_token_count",
flags=["--run-config-search-min-max-token-count"],
field_type=ConfigPrimitive(int),
default_value=config_defaults.DEFAULT_RUN_CONFIG_MIN_TOKEN_COUNT,
description="Min token count that run config search should start with.",
default_value=config_defaults.DEFAULT_RUN_CONFIG_MIN_MAX_TOKEN_COUNT,
description="Min max_token count that run config search should start with.",
)
)
self._add_config(
ConfigField(
"run_config_search_max_token_count",
flags=["--run-config-search-max-token-count"],
"run_config_search_max_max_token_count",
flags=["--run-config-search-max-max-token-count"],
field_type=ConfigPrimitive(int),
default_value=config_defaults.DEFAULT_RUN_CONFIG_MAX_TOKEN_COUNT,
description="Max token count that run config search will not go beyond.",
default_value=config_defaults.DEFAULT_RUN_CONFIG_MAX_MAX_TOKEN_COUNT,
description="Max max_token count that run config search will not go beyond.",
)
)

Expand Down Expand Up @@ -1420,50 +1495,35 @@ def _autofill_values(self):
new_model["parameters"] = {
"batch_sizes": self.batch_sizes,
"concurrency": self.concurrency,
"periodic_concurrency": self.periodic_concurrency,
"request_rate": self.request_rate,
"request_period": self.request_period,
"text_input_length": self.text_input_length,
"max_token_count": self.max_token_count,
}
else:
new_model["parameters"] = {}
if "batch_sizes" in model.parameters():
new_model["parameters"].update(
{"batch_sizes": model.parameters()["batch_sizes"]}
)
else:
new_model["parameters"].update({"batch_sizes": self.batch_sizes})

if "concurrency" in model.parameters():
new_model["parameters"].update(
{"concurrency": model.parameters()["concurrency"]}
)
else:
new_model["parameters"].update({"concurrency": self.concurrency})

if "request_rate" in model.parameters():
new_model["parameters"].update(
{"request_rate": model.parameters()["request_rate"]}
)
else:
new_model["parameters"].update({"request_rate": self.request_rate})

if "text_input_length" in model.parameters():
new_model["parameters"].update(
{"text_input_length": model.parameters()["text_input_length"]}
)
else:
new_model["parameters"].update(
{"text_input_length": self.text_input_length}
)

if "max_token_count" in model.parameters():
new_model["max_token_count"].update(
{"max_token_count": model.parameters()["max_token_count"]}
)
else:
new_model["parameters"].update(
{"max_token_count": self.text_input_length}
)
new_model["parameters"].update(
self._set_model_parameter(model, "batch_sizes")
)
new_model["parameters"].update(
self._set_model_parameter(model, "concurrency")
)
new_model["parameters"].update(
self._set_model_parameter(model, "periodic_concurrency")
)
new_model["parameters"].update(
self._set_model_parameter(model, "request_rate")
)
new_model["parameters"].update(
self._set_model_parameter(model, "request_period")
)
new_model["parameters"].update(
self._set_model_parameter(model, "max_token_count")
)
new_model["parameters"].update(
self._set_model_parameter(model, "text_input_length")
)

if (
new_model["parameters"]["request_rate"]
Expand Down Expand Up @@ -1506,6 +1566,14 @@ def _autofill_values(self):
new_profile_models[model.model_name()] = new_model
self._fields["profile_models"].set_value(new_profile_models)

def _set_model_parameter(
self, model: ConfigModelProfileSpec, parameter_name: str
) -> Dict:
if parameter_name in model.parameters():
return {parameter_name: model.parameters()[parameter_name]}
else:
return {parameter_name: getattr(self, parameter_name)}

def _using_request_rate(self) -> bool:
if self.request_rate or self.request_rate_search_enable:
return True
Expand Down Expand Up @@ -1550,16 +1618,26 @@ def is_llm_model(self) -> bool:
"""
Returns true if the user has enabled llm search or set any llm search value
"""
config = self.get_config()

return (
self.llm_search_enable
or self.get_config()[
"run_config_search_min_text_input_length"
or config["run_config_search_min_text_input_length"].is_set_by_user()
or config["run_config_search_max_text_input_length"].is_set_by_user()
or config["run_config_search_min_max_token_count"].is_set_by_user()
or config["run_config_search_max_max_token_count"].is_set_by_user()
or config["run_config_search_min_periodic_concurrency"].is_set_by_user()
or config["run_config_search_max_periodic_concurrency"].is_set_by_user()
or config[
"run_config_search_min_periodic_concurrency_step"
].is_set_by_user()
or self.get_config()[
"run_config_search_max_text_input_length"
or config[
"run_config_search_max_periodic_concurrency_step"
].is_set_by_user()
or self.get_config()["run_config_search_min_token_count"].is_set_by_user()
or self.get_config()["run_config_search_max_token_count"].is_set_by_user()
or self.get_config()["text_input_length"].is_set_by_user()
or self.get_config()["max_token_count"].is_set_by_user()
or config["run_config_search_min_request_period"].is_set_by_user()
or config["run_config_search_max_request_period"].is_set_by_user()
or config["text_input_length"].is_set_by_user()
or config["max_token_count"].is_set_by_user()
or config["periodic_concurrency"].is_set_by_user()
or config["request_period"].is_set_by_user()
)
Loading

0 comments on commit 6e0fc24

Please sign in to comment.