diff --git a/model_analyzer/config/generate/base_model_config_generator.py b/model_analyzer/config/generate/base_model_config_generator.py
index 475295eb0..fcdb7739c 100755
--- a/model_analyzer/config/generate/base_model_config_generator.py
+++ b/model_analyzer/config/generate/base_model_config_generator.py
@@ -70,6 +70,7 @@ def __init__(
self._base_model = model
self._base_model_name = model.model_name()
self._remote_mode = config.triton_launch_mode == "remote"
+ self._c_api_mode = config.triton_launch_mode == "c_api"
self._cpu_only = model.cpu_only()
self._default_only = default_only
self._early_exit_enable = early_exit_enable
@@ -154,7 +155,7 @@ def _get_last_results_max_throughput(self) -> Optional[float]:
def _make_remote_model_config_variant(self) -> ModelConfigVariant:
if not self._config.reload_model_disable:
- self._client.load_model(self._base_model_name)
+ self._client.load_model(model_name=self._base_model_name)
model_config = ModelConfig.create_from_triton_api(
self._client, self._base_model_name, self._config.client_max_retries
)
@@ -171,6 +172,7 @@ def _make_direct_mode_model_config_variant(
param_combo=param_combo,
model=self._base_model,
model_variant_name_manager=self._model_variant_name_manager,
+ c_api_mode=self._c_api_mode,
)
@staticmethod
@@ -178,6 +180,7 @@ def make_model_config_variant(
param_combo: dict,
model: ModelProfileSpec,
model_variant_name_manager: ModelVariantNameManager,
+ c_api_mode: bool,
) -> ModelConfigVariant:
"""
Loads the base model config from the model repository, and then applies the
@@ -189,6 +192,7 @@ def make_model_config_variant(
dict of key:value pairs to apply to the model config
model: ModelProfileSpec
model_variant_name_manager: ModelVariantNameManager
+ c_api_mode: Set to true if mode is c_api
"""
logger_str: List[str] = []
model_name = model.model_name()
@@ -211,7 +215,7 @@ def make_model_config_variant(
logger.info(str)
logger.info("")
- model_config_dict["name"] = variant_name
+ model_config_dict["name"] = variant_name if c_api_mode else model_name
model_config = ModelConfig.create_from_dictionary(model_config_dict)
model_config.set_cpu_only(model.cpu_only())
@@ -222,6 +226,7 @@ def make_ensemble_model_config_variant(
model: ModelProfileSpec,
ensemble_composing_model_config_variants: List[ModelConfigVariant],
model_variant_name_manager: ModelVariantNameManager,
+ c_api_mode: bool,
param_combo: Dict = {},
) -> ModelConfigVariant:
"""
@@ -235,6 +240,7 @@ def make_ensemble_model_config_variant(
ensemble_composing_model_config_variants: List of ModelConfigVariants
The list of composing model ModelConfigs
model_variant_name_manager: ModelVariantNameManager
+ c_api_mode: Set to true if mode is c_api
"""
logger_str: List[str] = []
@@ -243,12 +249,8 @@ def make_ensemble_model_config_variant(
model, param_combo, logger_str
)
- ensemble_config_dicts = [
- composing_model_config_variant.model_config.to_dict()
- for composing_model_config_variant in ensemble_composing_model_config_variants
- ]
ensemble_key = ModelVariantNameManager.make_ensemble_composing_model_key(
- ensemble_config_dicts
+ ensemble_composing_model_config_variants
)
(
@@ -265,7 +267,7 @@ def make_ensemble_model_config_variant(
for str in logger_str:
logger.info(str)
- model_config_dict["name"] = variant_name
+ model_config_dict["name"] = variant_name if c_api_mode else model_name
model_config = ModelConfig.create_from_dictionary(model_config_dict)
return ModelConfigVariant(model_config, variant_name)
diff --git a/model_analyzer/config/generate/model_run_config_generator.py b/model_analyzer/config/generate/model_run_config_generator.py
index 32f45d934..b068c7577 100755
--- a/model_analyzer/config/generate/model_run_config_generator.py
+++ b/model_analyzer/config/generate/model_run_config_generator.py
@@ -100,7 +100,7 @@ def get_configs(self) -> Generator[ModelRunConfig, None, None]:
for model_config_variant in self._mcg.get_configs():
self._pacg = PerfAnalyzerConfigGenerator(
self._config,
- model_config_variant.variant_name,
+ model_config_variant.model_config.get_field("name"),
self._model_pa_flags,
self._model_parameters,
self._pacg_early_exit_enable,
diff --git a/model_analyzer/config/generate/model_variant_name_manager.py b/model_analyzer/config/generate/model_variant_name_manager.py
index 118864a3f..2cdecb6d2 100755
--- a/model_analyzer/config/generate/model_variant_name_manager.py
+++ b/model_analyzer/config/generate/model_variant_name_manager.py
@@ -18,6 +18,7 @@
from typing import Dict, List, Tuple
from model_analyzer.constants import DEFAULT_CONFIG_PARAMS
+from model_analyzer.triton.model.model_config_variant import ModelConfigVariant
class ModelVariantNameManager:
@@ -45,12 +46,9 @@ def from_dict(
@staticmethod
def make_ensemble_composing_model_key(
- ensemble_config_dicts: List[Dict],
+ ensemble_model_config_variants: List[ModelConfigVariant],
) -> Dict[str, str]:
- ensemble_names = [
- ensemble_config_dict["name"]
- for ensemble_config_dict in ensemble_config_dicts
- ]
+ ensemble_names = [emcv.variant_name for emcv in ensemble_model_config_variants]
ensemble_key = ",".join(ensemble_names)
return {"key": ensemble_key}
diff --git a/model_analyzer/config/generate/quick_run_config_generator.py b/model_analyzer/config/generate/quick_run_config_generator.py
index 795036b39..5454765bf 100755
--- a/model_analyzer/config/generate/quick_run_config_generator.py
+++ b/model_analyzer/config/generate/quick_run_config_generator.py
@@ -91,6 +91,8 @@ def __init__(
self._triton_env = BruteRunConfigGenerator.determine_triton_server_env(models)
+ self._c_api_mode = config.triton_launch_mode == "c_api"
+
# This tracks measured results for all coordinates
self._coordinate_data = CoordinateData()
@@ -425,6 +427,7 @@ def _get_next_ensemble_model_config_variant(
ensemble_composing_model_config_variants=composing_config_variants,
model_variant_name_manager=self._model_variant_name_manager,
param_combo=param_combo,
+ c_api_mode=self._c_api_mode,
)
)
@@ -471,6 +474,7 @@ def _get_next_model_config_variant(
param_combo=param_combo,
model=model,
model_variant_name_manager=self._model_variant_name_manager,
+ c_api_mode=self._c_api_mode,
)
return model_config_variant
@@ -483,7 +487,7 @@ def _create_next_model_run_config(
composing_model_config_variants: List[ModelConfigVariant],
) -> ModelRunConfig:
perf_analyzer_config = self._get_next_perf_analyzer_config(
- model_config_variant.variant_name, model, model_index
+ model.model_name(), model, model_index
)
model_run_config = ModelRunConfig(
model.model_name(), model_config_variant, perf_analyzer_config
@@ -624,6 +628,7 @@ def _create_default_ensemble_model_run_config(
model=model,
ensemble_composing_model_config_variants=default_composing_model_config_variants,
model_variant_name_manager=self._model_variant_name_manager,
+ c_api_mode=self._c_api_mode,
)
default_perf_analyzer_config = self._create_default_perf_analyzer_config(
@@ -652,6 +657,7 @@ def _create_default_composing_model_config_variants(
param_combo={},
model=composing_model,
model_variant_name_manager=self._model_variant_name_manager,
+ c_api_mode=self._c_api_mode,
)
)
@@ -665,6 +671,7 @@ def _create_default_model_run_config(
param_combo={},
model=model,
model_variant_name_manager=self._model_variant_name_manager,
+ c_api_mode=self._c_api_mode,
)
)
diff --git a/model_analyzer/config/run/model_run_config.py b/model_analyzer/config/run/model_run_config.py
index ebb50dc1c..738c709b1 100755
--- a/model_analyzer/config/run/model_run_config.py
+++ b/model_analyzer/config/run/model_run_config.py
@@ -145,7 +145,8 @@ def representation(self) -> str:
Returns a representation string for the ModelRunConfig that can be used
as a key to uniquely identify it
"""
- repr = self.perf_config().representation()
+ repr = self.model_variant_name()
+ repr += " " + self.perf_config().representation()
if self._composing_config_variants:
repr += " " + (",").join(self.get_composing_config_names()) # type: ignore
diff --git a/model_analyzer/perf_analyzer/perf_config.py b/model_analyzer/perf_analyzer/perf_config.py
index 292920086..e9160a44a 100755
--- a/model_analyzer/perf_analyzer/perf_config.py
+++ b/model_analyzer/perf_analyzer/perf_config.py
@@ -207,7 +207,7 @@ def update_config_from_profile_config(self, model_name, profile_config):
params.update({"protocol": profile_config.client_protocol, "url": url})
- metrics_interval = (
+ metrics_interval = int(
profile_config.monitoring_interval * SECONDS_TO_MILLISECONDS_MULTIPLIER
)
params.update(
diff --git a/model_analyzer/record/metrics_manager.py b/model_analyzer/record/metrics_manager.py
index f8c6e0891..176b632df 100755
--- a/model_analyzer/record/metrics_manager.py
+++ b/model_analyzer/record/metrics_manager.py
@@ -387,14 +387,9 @@ def _load_model_variants(self, run_config):
# Composing configs for BLS models are not automatically loaded by the top-level model
if mrc.is_bls_model():
- for composing_config_variant in mrc.composing_configs():
- original_composing_config = (
- BaseModelConfigGenerator.create_original_config_from_variant(
- composing_config_variant
- )
- )
+ for composing_config_variant in mrc.composing_config_variants():
if not self._load_model_variant(
- variant_config=original_composing_config
+ variant_config=composing_config_variant
):
return False
@@ -423,13 +418,23 @@ def _do_load_model_variant(self, variant_config):
log_file=self._server.log_file(),
)
+ model_name = variant_config.model_config.get_field("name")
variant_name = variant_config.variant_name
- if self._client.load_model(model_name=variant_name) == -1:
+ config_str = variant_config.model_config.get_config_str()
+ if (
+ self._client.load_model(
+ model_name=model_name,
+ variant_name=variant_name,
+ config_str=config_str,
+ )
+ == -1
+ ):
return False
if (
self._client.wait_for_model_ready(
- model_name=variant_name, num_retries=self._config.client_max_retries
+ model_name=variant_config.model_config.get_field("name"),
+ num_retries=self._config.client_max_retries,
)
== -1
):
@@ -710,16 +715,15 @@ def _get_triton_metrics_gpus(self):
return triton_gpus
def _print_run_config_info(self, run_config):
- for perf_config in [
- mrc.perf_config() for mrc in run_config.model_run_configs()
- ]:
+ for model_run_config in run_config.model_run_configs():
+ perf_config = model_run_config.perf_config()
if perf_config["request-rate-range"]:
logger.info(
- f"Profiling {perf_config['model-name']}: client batch size={perf_config['batch-size']}, request-rate-range={perf_config['request-rate-range']}"
+ f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}, request-rate-range={perf_config['request-rate-range']}"
)
else:
logger.info(
- f"Profiling {perf_config['model-name']}: client batch size={perf_config['batch-size']}, concurrency={perf_config['concurrency-range']}"
+ f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}, concurrency={perf_config['concurrency-range']}"
)
# Vertical spacing when running multiple models at a time
diff --git a/model_analyzer/reports/report_manager.py b/model_analyzer/reports/report_manager.py
index 0232bdbf9..c6cd521d8 100755
--- a/model_analyzer/reports/report_manager.py
+++ b/model_analyzer/reports/report_manager.py
@@ -642,7 +642,9 @@ def _create_model_summary_sentence(self, run_config: RunConfig) -> str:
summary_sentence = (
summary_sentence
+ "
"
- + self._create_summary_config_info(model_run_config.model_config())
+ + self._create_summary_config_info(
+ model_run_config.model_config_variant()
+ )
+ " "
)
@@ -650,11 +652,13 @@ def _create_model_summary_sentence(self, run_config: RunConfig) -> str:
def _create_composing_model_summary_sentence(self, run_config: RunConfig) -> str:
summary_sentence = ""
- for composing_config in run_config.model_run_configs()[0].composing_configs():
+ for composing_config_variant in run_config.model_run_configs()[
+ 0
+ ].composing_config_variants():
summary_sentence = (
summary_sentence
+ " "
- + self._create_summary_config_info(composing_config)
+ + self._create_summary_config_info(composing_config_variant)
+ " "
)
@@ -1082,8 +1086,10 @@ def _create_non_gpu_metric_string(self, run_config_measurement, non_gpu_metric):
else:
return f"{non_gpu_metrics[0].value()}"
- def _create_summary_config_info(self, model_config):
- config_info = f"{model_config.get_field('name')}: "
+ def _create_summary_config_info(self, model_config_variant):
+ model_config = model_config_variant.model_config
+
+ config_info = f"{model_config_variant.variant_name}: "
config_info = (
config_info + f"{self._create_instance_group_phrase(model_config)} with a "
)
@@ -1235,11 +1241,11 @@ def _build_detailed_info(self, model_config_name):
if run_config.is_ensemble_model():
sentence = f"{model_config_name} is comprised of the following composing models:"
- for composing_config in run_config.composing_configs():
+ for composing_config_variant in run_config.composing_config_variants():
sentence = (
sentence
+ " "
- + self._create_summary_config_info(composing_config)
+ + self._create_summary_config_info(composing_config_variant)
+ " "
)
@@ -1250,11 +1256,11 @@ def _build_detailed_info(self, model_config_name):
elif run_config.is_bls_model():
sentence = f"{model_config_name} is comprised of the following composing models:"
- for composing_config in run_config.composing_configs():
+ for composing_config_variant in run_config.composing_config_variants():
sentence = (
sentence
+ " "
- + self._create_summary_config_info(composing_config)
+ + self._create_summary_config_info(composing_config_variant)
+ " "
)
diff --git a/model_analyzer/result/result_table_manager.py b/model_analyzer/result/result_table_manager.py
index a1890f9da..12a406e7c 100755
--- a/model_analyzer/result/result_table_manager.py
+++ b/model_analyzer/result/result_table_manager.py
@@ -388,10 +388,11 @@ def _tabulate_measurements(self, run_config_result):
def _tabulate_measurements_setup(self, run_config_result):
if run_config_result.run_config().is_ensemble_model():
+ model_config_variants = (
+ run_config_result.run_config().composing_config_variants()
+ )
model_configs = run_config_result.run_config().composing_configs()
- composing_config_names = [
- model_config.get_field("name") for model_config in model_configs
- ]
+ composing_config_names = [mcv.variant_name for mcv in model_config_variants]
else:
model_configs = [
model_run_configs.model_config()
diff --git a/model_analyzer/triton/client/client.py b/model_analyzer/triton/client/client.py
index a61470677..21ebeff15 100755
--- a/model_analyzer/triton/client/client.py
+++ b/model_analyzer/triton/client/client.py
@@ -74,7 +74,7 @@ def wait_for_server_ready(
"Could not determine server readiness. " "Number of retries exceeded."
)
- def load_model(self, model_name):
+ def load_model(self, model_name, variant_name="", config_str=None):
"""
Request the inference server to load
a particular model in explicit model
@@ -83,7 +83,13 @@ def load_model(self, model_name):
Parameters
----------
model_name : str
- name of the model to load from repository
+ Name of the model
+
+ variant_name: str
+ Name of the model variant
+
+ config_str: str
+ Optional config string used to load the model
Returns
------
@@ -91,12 +97,14 @@ def load_model(self, model_name):
Returns -1 if the failed.
"""
+ variant_name = variant_name if variant_name else model_name
+
try:
- self._client.load_model(model_name)
- logger.debug(f"Model {model_name} loaded")
+ self._client.load_model(model_name, config=config_str)
+ logger.debug(f"Model {variant_name} loaded")
return None
except Exception as e:
- logger.info(f"Model {model_name} load failed: {e}")
+ logger.info(f"Model {variant_name} load failed: {e}")
return -1
def unload_model(self, model_name):
diff --git a/model_analyzer/triton/model/model_config.py b/model_analyzer/triton/model/model_config.py
index f0933a9fa..6eb6cfd7e 100755
--- a/model_analyzer/triton/model/model_config.py
+++ b/model_analyzer/triton/model/model_config.py
@@ -14,6 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import json
import os
from copy import deepcopy
from distutils.dir_util import copy_tree
@@ -150,7 +151,7 @@ def _get_default_config_from_server(config, client, gpus, model_name, model_path
num_retries=config.client_max_retries, log_file=server.log_file()
)
- if client.load_model(model_name) == -1:
+ if client.load_model(model_name=model_name) == -1:
server.stop()
if not os.path.exists(model_path):
@@ -438,6 +439,17 @@ def get_config(self):
self._model_config, preserving_proto_field_name=True
)
+ def get_config_str(self):
+ """
+ Get the model config json str
+
+ Returns
+ -------
+ str
+ A JSON string containing the model configuration.
+ """
+ return json.dumps(self.get_config())
+
def set_config(self, config):
"""
Set the model config from a dictionary.
diff --git a/model_analyzer/triton/server/server_factory.py b/model_analyzer/triton/server/server_factory.py
index ed0477cc2..3fbf2277a 100755
--- a/model_analyzer/triton/server/server_factory.py
+++ b/model_analyzer/triton/server/server_factory.py
@@ -129,11 +129,11 @@ def get_server_handle(config, gpus, use_model_repository=False):
server = TritonServerFactory._get_remote_server_handle(config)
elif config.triton_launch_mode == "local":
server = TritonServerFactory._get_local_server_handle(
- config, gpus, use_model_repository
+ config, gpus, use_model_repository=True
)
elif config.triton_launch_mode == "docker":
server = TritonServerFactory._get_docker_server_handle(
- config, gpus, use_model_repository
+ config, gpus, use_model_repository=True
)
elif config.triton_launch_mode == "c_api":
server = TritonServerFactory._get_c_api_server_handle(
@@ -180,11 +180,11 @@ def _get_local_server_handle(config, gpus, use_model_repository):
triton_config["model-repository"] = (
config.model_repository
- if use_model_repository
+ if use_model_repository and config.model_repository
else config.output_model_repository_path
)
- if use_model_repository:
+ if use_model_repository and config.model_repository:
triton_config["strict-model-config"] = "false"
triton_config["http-port"] = config.triton_http_endpoint.split(":")[-1]
diff --git a/tests/test_bls_report_manager.py b/tests/test_bls_report_manager.py
index 19053863c..37773418b 100755
--- a/tests/test_bls_report_manager.py
+++ b/tests/test_bls_report_manager.py
@@ -26,7 +26,7 @@
from .common.test_utils import ROOT_DIR, evaluate_mock_config
-class TestBLSReportManagerMethods(trc.TestResultCollector):
+class TestBLSReportManager(trc.TestResultCollector):
def _init_managers(
self,
models="test_model",
diff --git a/tests/test_model_variant_name_manager.py b/tests/test_model_variant_name_manager.py
index b26a223bf..b21e57404 100755
--- a/tests/test_model_variant_name_manager.py
+++ b/tests/test_model_variant_name_manager.py
@@ -21,6 +21,8 @@
ModelVariantNameManager,
)
from model_analyzer.constants import DEFAULT_CONFIG_PARAMS
+from model_analyzer.triton.model.model_config import ModelConfig
+from model_analyzer.triton.model.model_config_variant import ModelConfigVariant
from tests.common.test_utils import default_encode
from .common import test_result_collector as trc
@@ -142,8 +144,8 @@ def test_ensemble_default(self):
"""
Test that a default ensemble config is returned
"""
- sub_configA = {"name": "modelA_config_default"}
- sub_configB = {"name": "modelB_config_default"}
+ sub_configA = ModelConfigVariant(ModelConfig({}), "modelA_config_default")
+ sub_configB = ModelConfigVariant(ModelConfig({}), "modelB_config_default")
ensemble_key = ModelVariantNameManager.make_ensemble_composing_model_key(
[sub_configA, sub_configB]
@@ -159,8 +161,8 @@ def test_ensemble_basic(self):
"""
Test that we can increment the ensemble config numbers
"""
- sub_configA = {"name": "modelA_config_0"}
- sub_configB = {"name": "modelB_config_0"}
+ sub_configA = ModelConfigVariant(ModelConfig({}), "modelA_config_0")
+ sub_configB = ModelConfigVariant(ModelConfig({}), "modelB_config_0")
ensemble_key = ModelVariantNameManager.make_ensemble_composing_model_key(
[sub_configA, sub_configB]
@@ -172,7 +174,7 @@ def test_ensemble_basic(self):
self.assertEqual(name, (False, "ensemble_model_config_0"))
- sub_configB = {"name": "modelB_config_1"}
+ sub_configB = ModelConfigVariant(ModelConfig({}), "modelB_config_1")
ensemble_key = ModelVariantNameManager.make_ensemble_composing_model_key(
[sub_configA, sub_configB]
@@ -184,7 +186,7 @@ def test_ensemble_basic(self):
self.assertEqual(name, (False, "ensemble_model_config_1"))
- sub_configA = {"name": "modelA_config_1"}
+ sub_configA = ModelConfigVariant(ModelConfig({}), "modelA_config_1")
ensemble_key = ModelVariantNameManager.make_ensemble_composing_model_key(
[sub_configA, sub_configB]
@@ -201,8 +203,8 @@ def test_ensemble_repeat(self):
Calling with the same model name/ensemble key multiple times
should result in the same config name being returned
"""
- sub_configA = {"name": "modelA_config_0"}
- sub_configB = {"name": "modelB_config_0"}
+ sub_configA = ModelConfigVariant(ModelConfig({}), "modelA_config_0")
+ sub_configB = ModelConfigVariant(ModelConfig({}), "modelB_config_0")
ensemble_key = ModelVariantNameManager.make_ensemble_composing_model_key(
[sub_configA, sub_configB]
diff --git a/tests/test_quick_run_config_generator.py b/tests/test_quick_run_config_generator.py
index 4c38de6ef..99f4fae65 100755
--- a/tests/test_quick_run_config_generator.py
+++ b/tests/test_quick_run_config_generator.py
@@ -145,7 +145,7 @@ def mock_bls_configs(*args, **kwargs):
class TestQuickRunConfigGenerator(trc.TestResultCollector):
def setUp(self):
fake_config = {
- "name": "fake_model_name1",
+ "name": "fake_model_name",
"input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": [16]}],
"max_batch_size": 4,
}
@@ -292,7 +292,7 @@ def test_get_next_run_config(self):
}
],
"maxBatchSize": 32,
- "name": "fake_model_name_config_0",
+ "name": "fake_model_name",
"input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}],
"optimization": {
"executionAccelerators": {
@@ -362,7 +362,7 @@ def test_get_next_run_config_multi_model(self):
}],
'maxBatchSize': 2,
'sequenceBatching': {},
- 'name': 'fake_model_name1_config_0',
+ 'name': 'fake_model_name1',
'input': [{
"name": "INPUT__0",
"dataType": "TYPE_FP32",
@@ -378,7 +378,7 @@ def test_get_next_run_config_multi_model(self):
'kind': 'KIND_GPU',
}],
'maxBatchSize': 16,
- 'name': 'fake_model_name2_config_0',
+ 'name': 'fake_model_name2',
'input': [{
"name": "INPUT__2",
"dataType": "TYPE_FP16",
@@ -774,7 +774,7 @@ def test_get_next_run_config_max_batch_size(self):
}
],
"maxBatchSize": 16,
- "name": "fake_model_name_config_0",
+ "name": "fake_model_name",
"input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}],
}
# yapf: enable
@@ -828,7 +828,7 @@ def test_get_next_run_config_max_instance_count(self):
}
],
"maxBatchSize": 32,
- "name": "fake_model_name_config_0",
+ "name": "fake_model_name",
"input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}],
}
# yapf: enable
@@ -882,7 +882,7 @@ def test_get_next_run_config_min_batch_size(self):
}
],
"maxBatchSize": 64,
- "name": "fake_model_name_config_0",
+ "name": "fake_model_name",
"input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}],
}
# yapf: enable
@@ -936,7 +936,7 @@ def test_get_next_run_config_min_instance_count(self):
}
],
"maxBatchSize": 32,
- "name": "fake_model_name_config_0",
+ "name": "fake_model_name",
"input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}],
}
# yapf: enable
@@ -1017,7 +1017,7 @@ def _get_next_run_config_ensemble(self, max_concurrency=0, min_concurrency=0):
}],
'maxBatchSize': 2,
'sequenceBatching': {},
- 'name': 'fake_model_A_config_0',
+ 'name': 'fake_model_A',
'input': [{
"name": "INPUT__0",
"dataType": "TYPE_FP32",
@@ -1033,7 +1033,7 @@ def _get_next_run_config_ensemble(self, max_concurrency=0, min_concurrency=0):
'kind': 'KIND_CPU',
}],
'maxBatchSize': 16,
- 'name': 'fake_model_B_config_0',
+ 'name': 'fake_model_B',
'input': [{
"name": "INPUT__2",
"dataType": "TYPE_FP16",
@@ -1184,7 +1184,7 @@ def _get_next_run_config_bls(self, max_concurrency=0, min_concurrency=0):
}],
'maxBatchSize': 2,
'dynamicBatching': {},
- 'name': 'my-model_config_0',
+ 'name': 'my-model',
'platform': 'bls',
'input': [{
"name": "INPUT__0",
@@ -1201,7 +1201,7 @@ def _get_next_run_config_bls(self, max_concurrency=0, min_concurrency=0):
}],
'maxBatchSize': 8,
'sequenceBatching': {},
- 'name': 'fake_model_A_config_0',
+ 'name': 'fake_model_A',
'input': [{
"name": "INPUT__0",
"dataType": "TYPE_FP32",
@@ -1217,7 +1217,7 @@ def _get_next_run_config_bls(self, max_concurrency=0, min_concurrency=0):
'kind': 'KIND_GPU',
}],
'maxBatchSize': 32,
- 'name': 'fake_model_B_config_0',
+ 'name': 'fake_model_B',
'input': [{
"name": "INPUT__2",
"dataType": "TYPE_FP16",
diff --git a/tests/test_results.py b/tests/test_results.py
index 0641946aa..2fc45162b 100755
--- a/tests/test_results.py
+++ b/tests/test_results.py
@@ -177,9 +177,27 @@ def _construct_results(self):
]
self._measurements = []
- self._measurements.append({"-m key_A": "1", "-m key_B": "2", "-m key_C": "3"})
- self._measurements.append({"-m key_D": "4", "-m key_E": "5", "-m key_F": "6"})
- self._measurements.append({"-m key_G": "7", "-m key_H": "8", "-m key_I": "9"})
+ self._measurements.append(
+ {
+ "model_config_0 -m key_A": "1",
+ "model_config_0 -m key_B": "2",
+ "model_config_0 -m key_C": "3",
+ }
+ )
+ self._measurements.append(
+ {
+ "model_config_1 -m key_D": "4",
+ "model_config_1 -m key_E": "5",
+ "model_config_1 -m key_F": "6",
+ }
+ )
+ self._measurements.append(
+ {
+ "model_config_2 -m key_G": "7",
+ "model_config_2 -m key_H": "8",
+ "model_config_2 -m key_I": "9",
+ }
+ )
self._result.add_run_config_measurement(self._run_configs[0], "1")
self._result.add_run_config_measurement(self._run_configs[1], "2")
diff --git a/tests/test_run_config.py b/tests/test_run_config.py
index b0517c8fd..9f63f83ff 100755
--- a/tests/test_run_config.py
+++ b/tests/test_run_config.py
@@ -61,13 +61,22 @@ def test_representation(self):
pc1.update_config({"model-name": "TestModel1"})
pc2 = PerfAnalyzerConfig()
pc2.update_config({"model-name": "TestModel2"})
- mrc1 = ModelRunConfig("model1", MagicMock(), pc1)
- mrc2 = ModelRunConfig("model2", MagicMock(), pc2)
+ mrc1 = ModelRunConfig(
+ "model1", ModelConfigVariant(MagicMock(), "model1_config_0"), pc1
+ )
+ mrc2 = ModelRunConfig(
+ "model2", ModelConfigVariant(MagicMock(), "model2_config_0"), pc2
+ )
rc = RunConfig({})
rc.add_model_run_config(mrc1)
rc.add_model_run_config(mrc2)
- expected_representation = pc1.representation() + pc2.representation()
+ expected_representation = (
+ "model1_config_0 "
+ + pc1.representation()
+ + "model2_config_0 "
+ + pc2.representation()
+ )
self.assertEqual(rc.representation(), expected_representation)
def test_representation_mrc_removal(self):
@@ -78,10 +87,12 @@ def test_representation_mrc_removal(self):
pc.update_config({"model-name": "TestModel1"})
pc.update_config({"measurement-request-count": "500"})
- mrc = ModelRunConfig("model1", MagicMock(), pc)
+ mrc = ModelRunConfig(
+ "model1", ModelConfigVariant(MagicMock(), "model1_config_0"), pc
+ )
- expected_represenation = "-m TestModel1"
- self.assertEqual(mrc.representation(), expected_represenation)
+ expected_representation = "model1_config_0 -m TestModel1"
+ self.assertEqual(mrc.representation(), expected_representation)
def test_cpu_only(self):
"""
diff --git a/tests/test_triton_server_factory.py b/tests/test_triton_server_factory.py
index a1285a29f..2448e89c9 100755
--- a/tests/test_triton_server_factory.py
+++ b/tests/test_triton_server_factory.py
@@ -28,7 +28,10 @@ class TestTritonServerFactory(trc.TestResultCollector):
def setUp(self):
# Mock path validation
self.mock_os = MockOSMethods(
- mock_paths=["model_analyzer.triton.server.server_factory"]
+ mock_paths=[
+ "model_analyzer.triton.server.server_factory",
+ "model_analyzer.config.input.config_utils",
+ ]
)
self.mock_os.start()
@@ -53,6 +56,7 @@ def _test_get_server_handle_helper(
"""
config = ConfigCommandProfile()
+ config.model_repository = "/fake_model_repository"
config.triton_launch_mode = launch_mode
config.triton_http_endpoint = "fake_address:2345"
config.triton_grpc_endpoint = "fake_address:4567"
@@ -61,7 +65,7 @@ def _test_get_server_handle_helper(
expected_http_port = "2345"
expected_grpc_port = "4567"
# Convert seconds to ms
- expected_metrics_interval_ms = config.monitoring_interval * 1000
+ expected_metrics_interval_ms = int(config.monitoring_interval * 1000)
with patch(
"model_analyzer.triton.server.server_factory.TritonServerFactory.create_server_local"